{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3cbafc80-2116-4b7a-b76e-efe6957899e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "_FINAL_DFS_DIR = os.path.join('..', '..', 'final_dfs')\n",
    "\n",
    "import sys\n",
    "sys.path.append('..')\n",
    "import plotconfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2995f345-551f-4fd8-9518-56e70580acc6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import ticker\n",
    "from matplotlib.ticker import FuncFormatter\n",
    "from scipy import stats\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30b103c5-0cc8-4e8a-b7d2-b705f9062955",
   "metadata": {},
   "outputs": [],
   "source": [
    "dbf = os.path.join(_FINAL_DFS_DIR, 'results.parquet')\n",
    "df = pd.read_parquet(dbf, engine='pyarrow')\n",
    "\n",
    "mask = ((df['method_name'] != \"BestSVR\") &\n",
    "        (df['method_name'] != \"BestMLP\") &\n",
    "        (df['eeg_name'] == \"EEG_Raw\") & \n",
    "        (df['test_name'] == \"random\"))\n",
    "\n",
    "df = df[mask]\n",
    "df['method_name'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6e074c0-a8b0-4af4-ae26-97c0b1682e1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, (ax1, ax2) = plt.subplots(1, 2, \n",
    "                               figsize=(10, 8), \n",
    "                               gridspec_kw={'width_ratios': [4, 1]})\n",
    "\n",
    "sns.lineplot(data=df, \n",
    "             x='training_size', \n",
    "             y='euclidean_at_top_rank', \n",
    "             hue='method_name', \n",
    "             errorbar=('se', 1),\n",
    "             ax=ax1)\n",
    "\n",
    "ax1.axhline(y=plotconfig.IR_RANDOM_EUCLIDEAN, color='r', linestyle='--', alpha=0.5, label='Theoretical Random')\n",
    "\n",
    "\n",
    "\n",
    "# Clear the existing legend\n",
    "ax1.get_legend().remove()\n",
    "# Create a new legend with custom labels and position\n",
    "handles, labels = ax1.get_legend_handles_labels()\n",
    "print(labels)\n",
    "# new_labels = ['Linear Regression', 'Shuffle Linear Regression', 'Dummy Regressor (Mean)', 'Theoretical Random Retrieval']  # Replace with your desired labels\n",
    "new_labels = ['Linear Regression', 'Shuffle Linear Regression', 'Mean Dummy Regressor', 'SVR', 'MLP', 'Theoretical Random Retrieval']  # Replace with your desired labels\n",
    "print(new_labels)\n",
    "ax1.legend(handles, new_labels, \n",
    "           bbox_to_anchor=(0.65, 0.5),  # x, y coordinates inside the plot\n",
    "           loc='center',               # Center the legend box at the specified coordinates\n",
    "           # title='Regressor Name',\n",
    "           # title_fontsize=14,\n",
    "           fontsize=plotconfig.LEGEND_FONTSIZE)\n",
    "\n",
    "ax1.set_title(\"N vs Euclidean at Top Rank (±ste)\", fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "ax1.set_xlabel(\"Number of stimuli-response pairs observed (N)\", fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "ax1.set_ylabel(\"Euclidean at Top Rank\", fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "\n",
    "current_xticks = ax1.get_xticks()\n",
    "new_xticks = np.sort(np.append(current_xticks, np.max(df['training_size'])\n",
    "))\n",
    "ax1.set_xticks(new_xticks)\n",
    "\n",
    "current_yticks = ax1.get_yticks()\n",
    "min_value = df[df['method_name'] == 'LinearRegression'].groupby('training_size')['euclidean_at_top_rank'].mean().min()\n",
    "# current_yticks = current_yticks[current_yticks != -0.8]\n",
    "new_yticks = np.sort(np.append(current_yticks, min_value))\n",
    "ax1.set_yticks(new_yticks)\n",
    "print(new_yticks)\n",
    "ax1.axhline(y=min_value, color='k', linestyle='--', alpha=0.5)\n",
    "\n",
    "ax1.tick_params(axis='x', labelsize=plotconfig.TICK_SIZE)\n",
    "ax1.tick_params(axis='y', labelsize=plotconfig.TICK_SIZE)\n",
    "\n",
    "# ax1.xaxis.set_major_formatter(ticker.FormatStrFormatter('%d'))\n",
    "# plt.gca().yaxis.set_major_formatter(FuncFormatter(custom_formatter))\n",
    "# plt.gca().xaxis.set_major_locator(plt.AutoLocator())\n",
    "ylim = [-0.5, 26]\n",
    "ax1.set_xlim(-100, 9300)  # Set x-axis limits from 0 to 60\n",
    "ax1.set_ylim(ylim)  # Set y-axis limits from 0 to 10\n",
    "ax1.invert_yaxis()\n",
    "\n",
    "\n",
    "df_last = df[df['training_size'] == 9000]\n",
    "\n",
    "sns.pointplot(x='method_name', y='euclidean_at_top_rank', hue='method_name', data=df_last, \n",
    "              capsize=0.5, linestyle='none', errorbar='sd')\n",
    "\n",
    "ax2.set_xlabel('')\n",
    "ax2.set_ylabel('')\n",
    "ax2.set_title('(±std)', fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "ax2.tick_params(axis='x', which='both', bottom=False, labelbottom=False)  # Remove y-ticks and labels\n",
    "ax2.tick_params(axis='y', which='both', left=False, labelleft=False)  # Remove y-ticks and labels\n",
    "\n",
    "ax2.set_ylim(ylim)  # Set y-axis limits from 0 to 10\n",
    "ax2.invert_yaxis()\n",
    "\n",
    "plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)\n",
    "\n",
    "\n",
    "# Save as high-definition\n",
    "plotconfig.save_fig(\"euclidean_top_rank\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e53a4436-0be2-4e95-bea8-4aa45b9fe04f",
   "metadata": {},
   "outputs": [],
   "source": [
    "mask = ((df['training_size'] == plotconfig.N_FOR_PERF_SCORE_COMPARISON))\n",
    "df = df[mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6db78686-2b4d-4dd5-9f7d-17f597fbcc92",
   "metadata": {},
   "outputs": [],
   "source": [
    "order = ['DummyScoring_Mean', \n",
    "         'Shuffle_LinearRegression', \n",
    "         'LinearRegression', \n",
    "         'SVR', \n",
    "         'MLP']\n",
    "\n",
    "sns.barplot(data=df, \n",
    "            x='method_name', \n",
    "            y='euclidean_at_top_rank', \n",
    "            hue='method_name', \n",
    "            order=order,\n",
    "            errorbar='sd')\n",
    "\n",
    "plt.xticks(rotation=45, ha='right')\n",
    "\n",
    "# plt.gca().invert_yaxis()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97856837-c7c2-4a9b-aef3-e2ebcb251281",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
