{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc6913da-2210-47f4-896b-e3cd507ab10a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "_FINAL_DFS_DIR = os.path.join('..', '..', 'final_dfs')\n",
    "\n",
    "import sys\n",
    "sys.path.append('..')\n",
    "import plotconfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5eaedd49-27ef-4219-8787-6529c9f4994f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import ticker\n",
    "from matplotlib.ticker import FuncFormatter\n",
    "from scipy import stats\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cee8b02d-387b-4e57-b394-b6723cda4b4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "dbf = os.path.join(_FINAL_DFS_DIR, 'results.parquet')\n",
    "df = pd.read_parquet(dbf, engine='pyarrow')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19d9d991-1936-4ae6-92d9-67e799f58a6e",
   "metadata": {},
   "outputs": [],
   "source": [
    "mask = ((df['method_name'] != \"BestSVR\") &\n",
    "        (df['method_name'] != \"BestMLP\") &\n",
    "        (df['eeg_name'] == \"EEG_Raw\") & \n",
    "        (df['test_name'] == \"random\"))\n",
    "\n",
    "df = df[mask]\n",
    "df['method_name'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba5dc4f4-7443-4ba1-b0d6-ce13c59c0062",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, (ax1, ax2) = plt.subplots(1, 2, \n",
    "                               figsize=(10, 8), \n",
    "                               gridspec_kw={'width_ratios': [4, 1]})\n",
    "\n",
    "sns.lineplot(data=df, \n",
    "             x='training_size', \n",
    "             y='pearsonr_statistic', \n",
    "             hue='method_name', \n",
    "             errorbar=('se', 1),\n",
    "             ax=ax1)\n",
    "\n",
    "ax1.axhline(y=plotconfig.IR_RANDOM_PEARSON_STATISTIC, color='r', linestyle='--', alpha=0.5, label='Theoretical Random')\n",
    "\n",
    "\n",
    "# Clear the existing legend\n",
    "ax1.get_legend().remove()\n",
    "# Create a new legend with custom labels and position\n",
    "handles, labels = ax1.get_legend_handles_labels()\n",
    "print(labels)\n",
    "# new_labels = ['Linear Regression', 'Shuffle Linear Regression', 'Dummy Regressor (Mean)', 'Theoretical Random Retrieval']  # Replace with your desired labels\n",
    "new_labels = ['Linear Regression', 'Shuffle Linear Regression', 'Mean Dummy Regressor', 'SVR', 'MLP', 'Random Retrieval']  # Replace with your desired labels\n",
    "print(new_labels)\n",
    "ax1.legend(handles, new_labels, \n",
    "           bbox_to_anchor=(0.25, 0.85),  # x, y coordinates inside the plot\n",
    "           loc='center',               # Center the legend box at the specified coordinates\n",
    "           # title_fontsize=14,\n",
    "           fontsize=plotconfig.LEGEND_FONTSIZE)\n",
    "\n",
    "ax1.set_title(\"N vs Pearson Correlation (± ste)\", fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "ax1.set_xlabel(\"Number of stimuli-response pairs observed (N)\", fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "ax1.set_ylabel(\"Pearson Correlation (r)\", fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "\n",
    "current_xticks = ax1.get_xticks()\n",
    "new_xticks = np.sort(np.append(current_xticks, np.max(df['training_size'])\n",
    "))\n",
    "ax1.set_xticks(new_xticks)\n",
    "\n",
    "current_yticks = ax1.get_yticks()\n",
    "min_value = df[df['method_name'] == 'LinearRegression'].groupby('training_size')['pearsonr_statistic'].mean().min()\n",
    "current_yticks = current_yticks[current_yticks != -0.8]\n",
    "new_yticks = np.sort(np.append(current_yticks, min_value))\n",
    "ax1.set_yticks(new_yticks)\n",
    "print(new_yticks)\n",
    "ax1.axhline(y=min_value, color='k', linestyle='--', alpha=0.5)\n",
    "\n",
    "def custom_formatter(x, pos):\n",
    "    if np.isclose(x, -0.773, rtol=1e-03, atol=1e-03):\n",
    "        return f\"{x:.2f}\"\n",
    "    else:\n",
    "        return f\"{x:.1f} \"\n",
    "        \n",
    "\n",
    "ax1.xaxis.set_major_formatter(ticker.FormatStrFormatter('%d'))\n",
    "ax1.yaxis.set_major_formatter(FuncFormatter(custom_formatter))\n",
    "\n",
    "ax1.tick_params(axis='x', labelsize=plotconfig.TICK_SIZE)\n",
    "ax1.tick_params(axis='y', labelsize=plotconfig.TICK_SIZE)\n",
    "\n",
    "# ax1.xaxis.set_major_formatter(ticker.FormatStrFormatter('%d'))\n",
    "# plt.gca().yaxis.set_major_formatter(FuncFormatter(custom_formatter))\n",
    "# plt.gca().xaxis.set_major_locator(plt.AutoLocator())\n",
    "ylim = [-1, 0.2]\n",
    "ax1.set_xlim(-100, 9300)  # Set x-axis limits from 0 to 60\n",
    "ax1.set_ylim(ylim)  # Set y-axis limits from 0 to 10\n",
    "ax1.invert_yaxis()\n",
    "\n",
    "\n",
    "df_last = df[df['training_size'] == plotconfig.LAST_N]\n",
    "\n",
    "sns.pointplot(x='method_name', y='pearsonr_statistic', hue='method_name', data=df_last, \n",
    "              capsize=0.5, linestyle='none', errorbar='sd')\n",
    "\n",
    "ax2.set_xlabel('')\n",
    "ax2.set_ylabel('')\n",
    "ax2.set_title('(±std)', fontsize=plotconfig.LABEL_FONTSIZE)\n",
    "ax2.tick_params(axis='x', which='both', bottom=False, labelbottom=False)  # Remove y-ticks and labels\n",
    "ax2.tick_params(axis='y', which='both', left=False, labelleft=False)  # Remove y-ticks and labels\n",
    "\n",
    "ax2.set_ylim(ylim)  # Set y-axis limits from 0 to 10\n",
    "ax2.invert_yaxis()\n",
    "\n",
    "plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)\n",
    "\n",
    "plotconfig.save_fig(\"correlation\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b7af682-4a55-41c3-bb85-7f5402187ef4",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.close() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ee6755f-0c95-42f0-9adf-c3fa08058553",
   "metadata": {},
   "outputs": [],
   "source": [
    "mask = ((df['training_size'] == plotconfig.N_FOR_PERF_SCORE_COMPARISON))\n",
    "df = df[mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1215e8d2-2699-4d44-9504-c4106043f824",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "order = ['DummyScoring_Mean', \n",
    "         'Shuffle_LinearRegression', \n",
    "         'LinearRegression', \n",
    "         'SVR', \n",
    "         'MLP']\n",
    "\n",
    "sns.barplot(data=df, \n",
    "            x='method_name', \n",
    "            y='pearsonr_statistic', \n",
    "            hue='method_name', \n",
    "            order=order,\n",
    "            errorbar='sd')\n",
    "\n",
    "plt.xticks(rotation=45, ha='right')\n",
    "\n",
    "plt.gca().invert_yaxis()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4367cdf-6cd1-45f3-9aa7-8c748c20d8dd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
