{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47e15eab-344e-4708-83eb-87b2e6aa69e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "_ROOT_PATH = os.path.join('..', '..', '..')\n",
    "_OPTIM_DIR = os.path.join(_ROOT_PATH, 'experiments', 'optim')\n",
    "_FINAL_DFS_DIR = os.path.join('..', '..', 'final_dfs')\n",
    "\n",
    "import sys\n",
    "sys.path.append('..')\n",
    "sys.path.append(os.path.join('..', '..', '..', 'tools'))\n",
    "import plotconfig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92f4574d-7e8b-49bc-b8e5-5d67414acb69",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "dbf = os.path.join(_FINAL_DFS_DIR, 'optim.parquet')\n",
    "df = pd.read_parquet(dbf, engine='pyarrow')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd96ae8d-f2db-4b12-83eb-a41a01cfc8ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Define the interval for subsampling\n",
    "interval = 100\n",
    "numbers_to_keep = list(range(0, df['number'].max() + 1, interval))\n",
    "subsampled_df = df[df['number'].isin(numbers_to_keep)]\n",
    "\n",
    "sns.lineplot(data=subsampled_df, x='number', y='value', hue='method_name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "724a0075-0331-4c84-b36f-c39ba906ebfb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "\n",
    "# Define the interval for subsampling\n",
    "interval = 10\n",
    "numbers_to_keep = list(range(0, df['number'].max() + 1, interval))\n",
    "subsampled_df = df[df['number'].isin(numbers_to_keep)]\n",
    "\n",
    "mask = ((subsampled_df['number'] <= 2500))\n",
    "subsampled_df = subsampled_df[mask]\n",
    "\n",
    "ax = sns.lineplot(data=subsampled_df, x='number', y='euclidean_distance', hue='method_name', errorbar='sd')\n",
    "\n",
    "mask = ((subsampled_df['method_name'] == 'LinearRegression') &\n",
    "       (subsampled_df['number'] > 2400))\n",
    "tmp = subsampled_df[mask]\n",
    "ax.axhline(y=np.mean(tmp['euclidean_distance']), color='red', alpha=0.5, linestyle='--', label='Euclidean Top Rank (LinReg)')\n",
    "\n",
    "\n",
    "plt.xlabel('Optimisation iterations')\n",
    "plt.ylabel('Distance to ground truth')\n",
    "plt.title('Optimisation results')\n",
    "\n",
    "plt.xlim([-20, 2520])\n",
    "\n",
    "# Save as high-definition\n",
    "plotconfig.save_fig(\"optim\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "391410f5-c018-45e2-95ec-949bcdbc5d7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "last = df[df['number'] == 999]\n",
    "last\n",
    "\n",
    "params_columns = ['params_x0', 'params_x1', 'params_x2', 'params_x3', 'params_x4', \n",
    "              'params_x5', 'params_x6', 'params_x7', 'params_x8', 'params_x9']\n",
    "params_array = last[params_columns].to_numpy()\n",
    "\n",
    "params_array\n",
    "\n",
    "pca_face.inverse_transform([-15]*10)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08499024-b2b3-4ee1-b5b8-3609095fc78e",
   "metadata": {},
   "outputs": [],
   "source": [
    "distance.euclidean(target_face, pca_face.inverse_transform([-15]*10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0f6657d-a560-429f-afa4-9f8fa8b04349",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.mean(tmp['euclidean_distance'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aadb7ee0-8ee0-440b-8b5d-0e7a981ce01c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import file_tools\n",
    "import saving_tools\n",
    "import face_tools\n",
    "\n",
    "import numpy as np\n",
    "face_tools.img_from_latent(np.array(d['best_face']))\n",
    "\n",
    "target_data = np.load(d['target_filename'])\n",
    "target_face = target_data['target_face']\n",
    "face_tools.img_from_latent(target_face)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c965b24-53ac-4a42-9970-895318189815",
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.spatial import distance\n",
    "import pickle\n",
    "\n",
    "result_df = pd.DataFrame()\n",
    "\n",
    "fs = file_tools.list_files(_OPTIM_DIR, \"*.json\")\n",
    "for f in fs:\n",
    "    d = saving_tools.load_dict_from_json(f)\n",
    "    df_filename = os.path.join(_ROOT_PATH, d['df_filename'])\n",
    "\n",
    "    df = pd.read_parquet(df_filename, engine='pyarrow')\n",
    "    df['method_name'] = d['method_name']\n",
    "\n",
    "    target_data = np.load(d['target_filename'])\n",
    "    target_face = target_data['target_face']\n",
    "\n",
    "    params_columns = ['params_x0', 'params_x1', 'params_x2', 'params_x3', 'params_x4', \n",
    "                  'params_x5', 'params_x6', 'params_x7', 'params_x8', 'params_x9']\n",
    "    params_array = df[params_columns].to_numpy()\n",
    "\n",
    "    pca_face_filename = os.path.join(_ROOT_PATH, d['pca_face_filename'])\n",
    "    with open(pca_face_filename, 'rb') as file:\n",
    "        pca_face = pickle.load(file)\n",
    "    tested_faces = pca_face.inverse_transform(params_array)\n",
    "    \n",
    "    euclidean_distances = np.array([distance.euclidean(target_face, row) for row in tested_faces])\n",
    "    df['euclidean_distance'] = euclidean_distances\n",
    "    \n",
    "    columns_to_keep = ['method_name', 'number', 'value', 'euclidean_distance']\n",
    "    new_df = df[columns_to_keep]\n",
    "    \n",
    "    result_df = pd.concat([result_df, new_df], ignore_index=True)\n",
    "\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71e81202-1b4d-4ddf-91ae-f7de73ad8e4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import numpy as np\n",
    "from scipy import stats\n",
    "from scipy.spatial import distance\n",
    "\n",
    "import dataset_tools\n",
    "true_labels = [distance.euclidean(row1, row2) for row1, row2 in zip(dataset_tools.observed_faces, dataset_tools.target_faces)]\n",
    "true_labels = np.array(true_labels)\n",
    "\n",
    "_OPTIM_DIR = os.path.join(_ROOT_PATH, 'experiments', 'optim')\n",
    "\n",
    "result_df = pd.DataFrame()\n",
    "fs = file_tools.list_files(_OPTIM_DIR, \"*.json\")\n",
    "for f in fs:\n",
    "    d = saving_tools.load_dict_from_json(f)\n",
    "\n",
    "    tmp_df = pd.DataFrame()\n",
    "    tmp_df['method_name'] = [d['method_name']]\n",
    "    \n",
    "    best_face = np.array(d['best_face'])\n",
    "    train_data = np.load(d['train_filename'])\n",
    "    face_observed = train_data['faces_observed']\n",
    "    pred_labels = np.array([distance.euclidean(best_face, row) for row in face_observed])\n",
    "\n",
    "    mse = np.mean((true_labels - pred_labels) ** 2)\n",
    "    rmse = np.sqrt(mse)\n",
    "    tmp_df['rmse'] = rmse\n",
    "\n",
    "    tmp_df['true_labels'] = [true_labels]\n",
    "    tmp_df['pred_labels'] = [pred_labels]\n",
    "    \n",
    "    result_df = pd.concat([result_df, tmp_df], ignore_index=True)\n",
    "\n",
    "    # break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8502aac7-8d9c-4a98-acaa-28aeb7529506",
   "metadata": {},
   "outputs": [],
   "source": [
    "# result_df.to_parquet('rmse_y_true_vs_y_reconstructed.parquet', engine='pyarrow')\n",
    "# result_df = pd.read_parquet('rmse_y_true_vs_y_reconstructed.parquet', engine='pyarrow')\n",
    "result_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1fb35063-0ddf-479d-ab6a-af426672f6a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Assuming your DataFrame is named 'df'\n",
    "grouped_df = result_df.groupby(['method_name'])\n",
    "\n",
    "# Calculate mean and std for each subtable\n",
    "compiled_result = grouped_df.agg({\n",
    "    'rmse': ['mean', 'std']\n",
    "})\n",
    "\n",
    "# Flatten the column names\n",
    "compiled_result.columns = ['_'.join(col).strip() for col in compiled_result.columns.values]\n",
    "\n",
    "# Reset the index to display method_name and training_size as columns\n",
    "compiled_result = compiled_result.reset_index()\n",
    "compiled_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b246a51b-b960-4cb1-8485-be6186abb690",
   "metadata": {},
   "outputs": [],
   "source": [
    "0.18/46.6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "682486dc-764c-44e3-965b-33e59d498df4",
   "metadata": {},
   "outputs": [],
   "source": [
    "d['method_name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13d0c203-ef37-4329-bb56-d48212eed9cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(true_labels, pred_labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c86e45b5-5135-4f53-a08a-6b703bb399dc",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
