{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pickle as pkl\n",
    "import os\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "root = '..'\n",
    "results_location = 'results'\n",
    "\n",
    "# dataset hyperparamters.\n",
    "dataset = 'retweet'\n",
    "seq_len_x = 10\n",
    "seq_len_h = 25\n",
    "\n",
    "# model hyperparameters.\n",
    "lr = 0.001\n",
    "batch_size = 256\n",
    "training_step = 100000\n",
    "dl_config = 'fenn_dl.yml'\n",
    "model_config = 'ehd_fenn.yml'\n",
    "\n",
    "# dppl files\n",
    "dppl_d_filename = 'test_dppl_d_distribution.pkl'\n",
    "dppl_l_filename = 'test_dppl_l_distribution.pkl'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Distilled events\n",
    "f_dppl_d = open(os.path.join(root, results_location, 'ehd', f'{dataset}_{seq_len_x}_{seq_len_h}', \\\n",
    "                             f'results_ehd_perplexity_lr{lr}_bs{batch_size}_nts{training_step}_{dl_config}_{model_config}', \\\n",
    "                             dppl_d_filename), 'rb')\n",
    "dppl_d = pkl.load(f_dppl_d)\n",
    "f_dppl_d.close()\n",
    "\n",
    "dppl_d_data = dppl_d.flatten()\n",
    "x_d = 'Number of Distilled Events'\n",
    "y_d = r'$dppl(\\mathcal{H}_{d}, \\mathcal{H}_f, \\mathbf{x}_o, p) / dppl_{max}(\\mathcal{H}_{d}, \\mathcal{H}_f, \\mathbf{x}_o, p)$'\n",
    "dict_dppl = {\n",
    "    x_d: list(range(dppl_d.shape[1], 0, -1)) * 5000,\n",
    "    y_d: dppl_d_data\n",
    "}\n",
    "df_dppl = pd.DataFrame.from_dict(dict_dppl)\n",
    "\n",
    "# Left events events\n",
    "f_dppl_l = open(os.path.join(root, results_location, 'ehd', f'{dataset}_{seq_len_x}_{seq_len_h}', \\\n",
    "                             f'results_ehd_perplexity_lr{lr}_bs{batch_size}_nts{training_step}_{dl_config}_{model_config}', \\\n",
    "                             dppl_l_filename), 'rb')\n",
    "dppl_l = pkl.load(f_dppl_l)\n",
    "f_dppl_l.close()\n",
    "\n",
    "dppl_l_data = dppl_l.flatten()\n",
    "y_l = r'$dppl(\\mathcal{H}_{l}, \\mathcal{H}_f, \\mathbf{x}_o, p) / dppl_{max}(\\mathcal{H}_{l}, \\mathcal{H}_f, \\mathbf{x}_o, p)$'\n",
    "df_dppl[y_l] = dppl_l_data\n",
    "\n",
    "df_dppl_melted = df_dppl.melt(id_vars = x_d, value_vars = [y_d, y_l])\n",
    "df_dppl_melted.columns = [x_d, 'dppl ratio', 'ratio value']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8820"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "plt.rcParams.update({'font.size': 22, 'figure.figsize': (12, 8)})\n",
    "fig = plt.figure()\n",
    "\n",
    "sns.lineplot(data = df_dppl_melted, x = x_d, y = 'ratio value', hue = 'dppl ratio')\n",
    "\n",
    "plt.savefig(os.path.join(f'trend_of_dppl_{dataset}_{seq_len_x}_{seq_len_h}' + '.png'), dpi = 1000)\n",
    "fig.clf()\n",
    "plt.close(fig = fig)\n",
    "import gc\n",
    "gc.collect()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "tpp",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
