{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c874a13e-fc57-4400-8a5d-4fbdeb58ce82",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "_FINAL_DFS_DIR = os.path.join('..', '..', 'final_dfs')\n",
    "_CSV_DIR = os.path.join('..', '..', 'csv_files')\n",
    "\n",
    "import sys\n",
    "sys.path.append('..')\n",
    "import plotconfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99956c2f-2ad5-459b-9ff3-290eaeb9e668",
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import ticker\n",
    "from matplotlib.ticker import FuncFormatter\n",
    "from scipy import stats\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c45dc743-ba5c-4f71-8af6-1f27dc4bf5dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "dbf = os.path.join(_FINAL_DFS_DIR, 'results.parquet')\n",
    "df = pd.read_parquet(dbf, engine='pyarrow')\n",
    "\n",
    "mask = ((df['method_name'] == \"LinearRegression\") &\n",
    "        (df['eeg_name'] == \"EEG_Raw\") & \n",
    "        (df['training_size'] == plotconfig.N_FOR_PERF_SCORE_COMPARISON) & \n",
    "        (df['test_name'] == \"diagonal\"))\n",
    "\n",
    "df_algorithm = df[mask]\n",
    "df_algorithm['method_name'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b384cf81-bf2c-481b-b92a-3286608341e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "humanfile = os.path.join(_CSV_DIR, 'image-distance-user-evaluations.csv')\n",
    "df_human = pd.read_csv(humanfile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d7fd9075-f3da-4abb-a69a-11d875a16cdf",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_human['round_d'] = round(df_human['distance'] * df_algorithm['true_distances'].explode().max(), 1)\n",
    "\n",
    "grouped_df = df_human.groupby(['round_d'])\n",
    "compiled_result = grouped_df.agg({\n",
    "    'distance_eval': ['mean', 'std', 'sem']\n",
    "})\n",
    "compiled_result.columns = ['_'.join(col).strip() for col in compiled_result.columns.values]\n",
    "compiled_result = compiled_result.reset_index()\n",
    "human_results = compiled_result\n",
    "human_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d75335b-02a3-43d6-81cc-775d4bdc8263",
   "metadata": {},
   "outputs": [],
   "source": [
    "human_unique_round_d = df_human['round_d'].unique()\n",
    "human_unique_round_d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0bf690a5-d7d6-40d3-922a-3ef1ac43215a",
   "metadata": {},
   "outputs": [],
   "source": [
    "algorithm_round_ds = []\n",
    "algorithm_scores = []\n",
    "for i in range(len(df_algorithm)):\n",
    "    for d in human_unique_round_d:\n",
    "        round_distances = np.round(df_algorithm.iloc[0]['true_distances'], 1)\n",
    "        id = np.where(round_distances == d)[0][0]\n",
    "        score = df_algorithm.iloc[i]['scores'][id]\n",
    "        algorithm_round_ds.append(d)\n",
    "        algorithm_scores.append(score)\n",
    "\n",
    "data = {\n",
    "    'algorithm_round_ds': algorithm_round_ds,\n",
    "    'algorithm_scores': algorithm_scores\n",
    "}\n",
    "\n",
    "dfunpacked = pd.DataFrame(data)\n",
    "grouped_df = dfunpacked.groupby(['algorithm_round_ds'])\n",
    "compiled_result = grouped_df.agg({\n",
    "    'algorithm_scores': ['mean', 'std', 'sem']\n",
    "})\n",
    "compiled_result.columns = ['_'.join(col).strip() for col in compiled_result.columns.values]\n",
    "compiled_result = compiled_result.reset_index()\n",
    "\n",
    "algo_results = compiled_result\n",
    "algo_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad1239a2-941b-4942-8b9c-c172d779ea55",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(8, 8))\n",
    "\n",
    "x = human_results['distance_eval_mean']\n",
    "y = algo_results['algorithm_scores_mean']\n",
    "\n",
    "x_err = human_results['distance_eval_std']  \n",
    "y_err = algo_results['algorithm_scores_std'] \n",
    "\n",
    "colors = algo_results['algorithm_round_ds']\n",
    "\n",
    "scatter = ax.scatter(x, y, c=colors, s=50, cmap='viridis')\n",
    "\n",
    "# Add error bars\n",
    "for i in range(len(x)):\n",
    "    ax.errorbar(x[i], y[i], xerr=x_err[i], yerr=y_err[i], \n",
    "                fmt='o', color=scatter.cmap(scatter.norm(colors[i])), \n",
    "                elinewidth=1, capsize=3, capthick=1, zorder=0)\n",
    "\n",
    "# Create a colorbar\n",
    "norm = mpl.colors.Normalize(vmin=min(colors), vmax=max(colors))\n",
    "cbar = fig.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap='viridis'), ax=ax, aspect=40)\n",
    "cbar.set_label('Distance to Target')\n",
    "\n",
    "# Calculate regression statistics\n",
    "\n",
    "slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)\n",
    "\n",
    "x_reg = np.array([0, 1])\n",
    "y_reg = slope * x_reg + intercept\n",
    "plt.plot(x_reg, y_reg, linestyle=':', label='Regression Line')\n",
    "\n",
    "\n",
    "# p_value_formatted = f\"{p_value:.1e}\"\n",
    "# statistic_fontsize = 20\n",
    "# plt.text(0.35, 0.30, f'r = {r_value:.2f}, '\n",
    "#                     f'p = {p_value_formatted}', \n",
    "#         transform=ax.transAxes,\n",
    "#         verticalalignment='top',\n",
    "#         fontsize=statistic_fontsize)\n",
    "\n",
    "ax.set_xlabel('Human Scores')\n",
    "ax.set_ylabel('Algorithm Scores')\n",
    "ax.set_title('Human vs Algortihm Scores')\n",
    "\n",
    "\n",
    "\n",
    "plt.xlim([-0.05, 1.05])\n",
    "plt.ylim([1.077, 1.11])\n",
    "# plt.gca().invert_yaxis()\n",
    "\n",
    "\n",
    "# Save as high-definition\n",
    "plotconfig.save_fig(\"human_vs_algo\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79603b52-2907-468c-9ac2-5e94f3d2c3a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "directory = '../../../user_experiments/results'\n",
    "\n",
    "dfs = []\n",
    "for filename in os.listdir(directory):\n",
    "    if filename.endswith('.csv'):\n",
    "        file_path = os.path.join(directory, filename)\n",
    "        df = pd.read_csv(file_path)\n",
    "        dfs.append(df)\n",
    "\n",
    "df = pd.concat(dfs, ignore_index=True)\n",
    "\n",
    "df['match'] = (df['target'] != df['chosen']).astype(int)\n",
    "mask = ((df['showing_time'] == 500))\n",
    "\n",
    "df_masked = df[mask]\n",
    "\n",
    "grouped_df = df_masked.groupby(['rounded_d', 'showing_time'])\n",
    "compiled_result = grouped_df.agg({\n",
    "    'match': ['mean', 'std', 'sem', 'count']\n",
    "})\n",
    "compiled_result.columns = ['_'.join(col).strip() for col in compiled_result.columns.values]\n",
    "compiled_result = compiled_result.reset_index()\n",
    "compiled_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6682ec4-78a4-491c-8780-e0b2288ee23d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.stats import norm\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Wilson Score Interval function\n",
    "def wilson_score_interval(successes, trials, confidence_level=0.95):\n",
    "    if trials == 0:\n",
    "        return (0.0, 0.0)  # No trials, no interval\n",
    "    z = norm.ppf((1 + confidence_level) / 2)\n",
    "    phat = successes / trials\n",
    "    denominator = 1 + (z**2 / trials)\n",
    "    centre_adjusted_probability = phat + (z**2 / (2 * trials))\n",
    "    adjusted_standard_deviation = np.sqrt((phat * (1 - phat) + (z**2 / (4 * trials))) / trials)\n",
    "    lower_bound = (centre_adjusted_probability - z * adjusted_standard_deviation) / denominator\n",
    "    upper_bound = (centre_adjusted_probability + z * adjusted_standard_deviation) / denominator\n",
    "    return (lower_bound, upper_bound)\n",
    "\n",
    "# Grouping by 'rounded_d' and calculating success and trial counts\n",
    "results = df_masked.groupby('rounded_d').agg(successes=('match', 'sum'), trials=('match', 'count')).reset_index()\n",
    "\n",
    "# Applying Wilson Score Interval function to each group\n",
    "results[['lower_bound', 'upper_bound']] = results.apply(\n",
    "    lambda row: pd.Series(wilson_score_interval(row['successes'], row['trials'])), axis=1\n",
    ")\n",
    "\n",
    "# Calculate the sample proportion\n",
    "results['sample_proportion'] = results['successes'] / results['trials']\n",
    "\n",
    "# Plotting with Seaborn\n",
    "plt.figure(figsize=(8, 6))\n",
    "ax = sns.lineplot(data=results, x='rounded_d', y='sample_proportion', label='Sample Proportion', marker='o')\n",
    "plt.fill_between(results['rounded_d'], results['lower_bound'], results['upper_bound'], alpha=0.3, label='Wilson Score Interval')\n",
    "plt.xlabel('Distance to Target Face')\n",
    "plt.ylabel('Identification Error Rate (N=53)')\n",
    "plt.title('Identification Error Rate with Wilson Score Intervals (N=53)')\n",
    "\n",
    "ax.axvline(x=2.9, color=sns.color_palette()[1], alpha=0.9, linewidth=2.5, linestyle='--', label='Euclidean Top Rank (LinReg)')\n",
    "ax.axvline(x=plotconfig.IR_RANDOM_EUCLIDEAN, color=sns.color_palette()[4], alpha=0.9, linewidth=2.5, linestyle='--', label='Euclidean Top Rank (Random)')\n",
    "ax.axvline(x=0.9, color=sns.color_palette()[2], alpha=0.9, linewidth=2.5, linestyle='--', label='Euclidean Optimised (LinReg)')\n",
    "\n",
    "plt.legend()\n",
    "\n",
    "# Save as high-definition\n",
    "plotconfig.save_fig(\"match_proba\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c9b7126-cac8-44fe-9427-a77b7561e5e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba6e5ada-180e-47f3-bfd1-3f2a5dce5702",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
