{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9adee767-0fe0-40c8-af36-5cea1a42bca2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# adding tools directory to path, so we can access the utils easily\n",
    "import sys\n",
    "root_path = os.path.join('.', 'tools')\n",
    "sys.path.append(root_path)\n",
    "\n",
    "import file_tools\n",
    "_EXP_DIR = os.path.join('.', 'experiments')\n",
    "_DATASET_DIR = os.path.join(_EXP_DIR, 'datasets')\n",
    "\n",
    "_RESULTS_DIR = os.path.join(_EXP_DIR, 'results')\n",
    "file_tools.ensure_dir(_RESULTS_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06fd4bd5-9440-4131-9d57-b767eae74455",
   "metadata": {},
   "outputs": [],
   "source": [
    "import dataset_tools\n",
    "\n",
    "import numpy as np\n",
    "from scipy.spatial import distance\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "X = dataset_tools.eeg_raw\n",
    "y = [distance.euclidean(row1, row2) for row1, row2 in zip(dataset_tools.observed_faces, dataset_tools.target_faces)]\n",
    "scaler = StandardScaler()\n",
    "y = scaler.fit_transform(np.array(y).reshape(-1, 1)).flatten().tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a396864e-cd8c-4aad-b2f3-2aa4f21e323d",
   "metadata": {},
   "outputs": [],
   "source": [
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b74c09ea-8954-43ac-a4b3-d1fad43cdef5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import umap\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "reducer = umap.UMAP(n_components=10, n_neighbors=50, metric='euclidean', n_jobs=-1)\n",
    "embedding = reducer.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a12480cf-faa4-4106-bd80-24da2e588bf2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.model_selection import cross_validate\n",
    "from sklearn.model_selection import ShuffleSplit\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "estimator = Pipeline([\n",
    "    ('scaler_x', StandardScaler()),\n",
    "    ('svr', LinearRegression())\n",
    "])\n",
    "\n",
    "ss = ShuffleSplit(n_splits=10, test_size=0.1)\n",
    "scorings=['neg_root_mean_squared_error']\n",
    "\n",
    "raw_scores = cross_validate(estimator, embedding, y, cv=ss, scoring=scorings, n_jobs=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c780384-a67e-47fa-a2df-0295add35d6b",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.mean(raw_scores['test_neg_root_mean_squared_error'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97b57a39-d0e2-4ee5-997c-18fc755ef45f",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(embedding[:, 0], embedding[:, 1], c=y)\n",
    "plt.colorbar()\n",
    "plt.title('UMAP projection of random 10-dimensional data')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25cf3c83-5b5c-42c2-92e7-1887d00a6def",
   "metadata": {},
   "outputs": [],
   "source": [
    "indices = [3, 5, 26, 32, 34, 55, 61, 63, 84, 90, 92, 113, 119, 121, 142, 148, 150, 171, 177, 179, 200]\n",
    "# indices = [26, 55, 84, 113, 142, 171, 200] # Pz\n",
    "# indices = [90, 92, 113, 119, 121, 142]\n",
    "\n",
    "# indices = [1, 3, 5, 10, 26, 28, 30, 32, 34, 39, 55, 57, 59, 61, 63, 68, 84, 86, 88, 90, 92, 97, 113, 115, 117, 119, 121, 126, 142, 144, 146, 148, 150, 155, 171, 173, 175, 177, 179, 184, 200, 202]\n",
    "\n",
    "# indices = [1, 3, 5, 10, 21, 26, 28, 30, 32, 34, 39, 50, 55, 57, 59, 61, 63, 68, 79, 84, 86, 88, 90, 92, 97, 108, 113, 115, 117, 119, 121, 126, 137, 142, 144, 146, 148, 150, 155, 166, 171, 173, 175, 177, 179, 184, 195, 200, 202]\n",
    "\n",
    "X2 = X[:, indices]\n",
    "\n",
    "estimator = Pipeline([\n",
    "    ('scaler_x', StandardScaler()),\n",
    "    ('svr', LinearRegression())\n",
    "])\n",
    "\n",
    "ss = ShuffleSplit(n_splits=10, test_size=0.1)\n",
    "scorings=['neg_root_mean_squared_error']\n",
    "\n",
    "scores2 = cross_validate(estimator, X2, y, cv=ss, scoring=scorings, n_jobs=-1)\n",
    "print(np.mean(scores2['test_neg_root_mean_squared_error']))\n",
    "print(X2.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80a3eb61-9eaa-4938-9787-22b2d89c4141",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.decomposition import PCA\n",
    "\n",
    "m = []\n",
    "t = []\n",
    "index = [2, 5, 10, 25, 50, 100, 200]\n",
    "for i in index:\n",
    "    pca = PCA(n_components=i)\n",
    "    principalComponents = pca.fit_transform(X)\n",
    "\n",
    "    tmp = []\n",
    "    tmpt = []\n",
    "    for j in range(10):\n",
    "        scores3 = cross_validate(estimator, principalComponents, y, cv=ss, scoring=scorings, n_jobs=-1)\n",
    "        mean_score3 = np.mean(scores3['test_neg_root_mean_squared_error'])\n",
    "        tmp.append(mean_score3)\n",
    "        tmpt.append(np.mean(scores3['fit_time']))\n",
    "    \n",
    "    m.append(np.mean(tmp))\n",
    "    t.append(np.mean(tmpt))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0afe7fba-2a2a-442c-b66f-039e395577a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(index, m)\n",
    "plt.scatter(21, -0.94)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f125d4fe-f368-4cb9-ab4e-3a161957a52e",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(index, t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6bce85f-65e8-4697-b8b7-46dc690729a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = estimator.fit(X2, y).predict(X2)\n",
    "\n",
    "plt.scatter(y, y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "70f583a6-589a-4f42-860a-8b21230daccf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.model_selection import RandomizedSearchCV\n",
    "from sklearn.svm import SVR\n",
    "from sklearn.neural_network import MLPRegressor\n",
    "from sklearn.model_selection import ShuffleSplit\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.metrics import make_scorer, mean_squared_error\n",
    "\n",
    "estimator = Pipeline([\n",
    "    ('scaler_x', StandardScaler()),\n",
    "    ('svr', SVR())\n",
    "])\n",
    "\n",
    "param_grid = {\n",
    "    'svr__C': [0.01, 0.1],\n",
    "    'svr__gamma': ['scale', 'auto'],\n",
    "    'svr__kernel': ['linear', 'rbf']\n",
    "}\n",
    "# best svr\n",
    "# {'svr__C': 0.01, 'svr__gamma': 'scale', 'svr__kernel': 'linear'}\n",
    "\n",
    "\n",
    "# estimator = Pipeline([\n",
    "#     ('scaler_x', StandardScaler()),\n",
    "#     ('mlp', MLPRegressor(max_iter=1000))\n",
    "# ])\n",
    "\n",
    "# param_grid = {\n",
    "#     'mlp__hidden_layer_sizes': [2*(100,)],\n",
    "#     'mlp__activation': ['identity', 'relu'],\n",
    "#     'mlp__alpha': [0.1],\n",
    "#     'mlp__learning_rate': ['adaptive']\n",
    "# }\n",
    "\n",
    "# best mlp\n",
    "# {'mlp__activation': 'identity',\n",
    "#  'mlp__alpha': 0.1,\n",
    "#  'mlp__hidden_layer_sizes': (100, 100),\n",
    "#  'mlp__learning_rate': 'adaptive'}\n",
    "\n",
    "def rmse(y_true, y_pred):\n",
    "    return np.sqrt(mean_squared_error(y_true, y_pred))\n",
    "\n",
    "rmse_scorer = make_scorer(rmse, greater_is_better=False)\n",
    "\n",
    "n_splits = 10\n",
    "test_size = 0.1\n",
    "ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=42)\n",
    "\n",
    "# GridSearchCV\n",
    "grid_search = GridSearchCV(estimator, \n",
    "                            param_grid,\n",
    "                            cv=ss,    \n",
    "                            scoring=rmse_scorer,\n",
    "                            n_jobs=-1,\n",
    "                            verbose=2)\n",
    "\n",
    "grid_search.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "631223f8-3d9b-40e5-9326-279428ef82d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Convert cv_results_ to a DataFrame\n",
    "results_df = pd.DataFrame(grid_search.cv_results_)\n",
    "\n",
    "# Save to CSV\n",
    "results_df.to_csv('grid_search_results.csv', index=False)\n",
    "\n",
    "import pickle\n",
    "\n",
    "# Save the entire GridSearchCV object\n",
    "with open('grid_search_results.pkl', 'wb') as f:\n",
    "    pickle.dump(grid_search, f)\n",
    "\n",
    "# To load:\n",
    "# with open('grid_search_results.pkl', 'rb') as f:\n",
    "#     loaded_grid_search = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9181d496-fb96-4b91-9e41-bf17f1506e11",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "results = pd.DataFrame(grid_search.cv_results_)\n",
    "plt.plot(results['mean_test_score'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56094d05-e0f2-47fe-b343-7e8eb6c0c88f",
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_search.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb8ec999-eb70-4f30-bb66-70b541ab706b",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred = grid_search.predict(X)\n",
    "plt.scatter(y, y_pred)\n",
    "plt.plot([-2, 2], [-2, 2], 'r')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b79a2345-4ab6-4757-9655-8a30754560ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "grid_search.cv_results_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a18beb02-f0e7-4b2c-b606-8a2261a5899b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from skopt import BayesSearchCV\n",
    "from skopt.space import Real, Categorical, Integer\n",
    "\n",
    "from sklearn.datasets import load_diabetes\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, y = load_diabetes(return_X_y=True)\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0)\n",
    "\n",
    "def on_step(optim_result):\n",
    "    print(f\"Best score: {optim_result.fun}\")\n",
    "\n",
    "from sklearn.metrics import make_scorer, mean_squared_error\n",
    "from sklearn.model_selection import cross_val_score\n",
    "import numpy as np\n",
    "\n",
    "def rmse(y_true, y_pred):\n",
    "    return np.sqrt(mean_squared_error(y_true, y_pred))\n",
    "\n",
    "rmse_scorer = make_scorer(rmse, greater_is_better=False)\n",
    "\n",
    "estimator = Pipeline([\n",
    "    ('scaler_x', StandardScaler()),\n",
    "    ('svr', TransformedTargetRegressor(\n",
    "        regressor=SVR(),\n",
    "        transformer=StandardScaler()\n",
    "    ))\n",
    "])\n",
    "\n",
    "search_spaces = {\n",
    "    'svr__regressor__C': Real(1e-6, 1e+6, prior='log-uniform'),\n",
    "    'svr__regressor__gamma': Real(1e-6, 1e+1, prior='log-uniform'),\n",
    "    'svr__regressor__kernel': Categorical(['linear', 'rbf'])\n",
    "}\n",
    "\n",
    "opt = BayesSearchCV(\n",
    "    estimator,\n",
    "    search_spaces,\n",
    "    scoring=rmse_scorer,\n",
    "    n_iter=50,\n",
    "    n_jobs=-1,\n",
    "    random_state=0\n",
    ")\n",
    "\n",
    "# executes bayesian optimization\n",
    "_ = opt.fit(X_train, y_train, callback=on_step)\n",
    "\n",
    "# model can be saved, used for predictions or scoring\n",
    "print(opt.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "38eaa6d6-4b90-42a3-aee5-57612f51a41f",
   "metadata": {},
   "outputs": [],
   "source": [
    "results = opt.optimizer_results_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ad494ec-9e94-4bf3-a7ae-3b83f325abe6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from skopt.plots import plot_objective\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Plot for each optimization run\n",
    "for i, res in enumerate(results):\n",
    "    _ = plot_objective(res)\n",
    "    plt.title(f\"Objective Plot for Run {i+1}\")\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21bdcd28-93b3-4704-b2bf-d2f9fca80c28",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
