{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np, pandas as pd\n",
    "import os, sys\n",
    "import joblib\n",
    "import math\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "scores_dir = './scores/'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Combine all score files into single file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "models = ['timeVAE', 'rcgan', 'T_forcing', 'timegan', 'orig']   # vae_conv_I, rcgan, timegan, T_forcing\n",
    "\n",
    "training_sizes = [2, 5, 10, 20, 100]\n",
    "datasets = ['air', 'energy', 'sine', 'stocksv', ]\n",
    "\n",
    "all_scores = []\n",
    "for model in models:\n",
    "    for dataset in datasets:\n",
    "        for training_size in training_sizes:\n",
    "            # pred scores\n",
    "            fname = f\"./{scores_dir}/{model}/{model}_pred_scores_{dataset}_{training_size}.csv\"\n",
    "            if os.path.exists(fname):\n",
    "                print(fname)\n",
    "                data = pd.read_csv(fname)\n",
    "                all_scores.append(data)\n",
    "                \n",
    "            # disc scores\n",
    "            fname = f\"./{scores_dir}/{model}/{model}_disc_scores_{dataset}_{training_size}.csv\"\n",
    "            if os.path.exists(fname):\n",
    "                print(fname)\n",
    "                data = pd.read_csv(fname)\n",
    "                all_scores.append(data)\n",
    "            else: \n",
    "                print(\"doesnt exist\", fname)\n",
    "                \n",
    "all_scores = pd.concat(all_scores, ignore_index=True)\n",
    "all_scores = all_scores.round(3)\n",
    "all_scores.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_scores.to_csv(f\"./{scores_dir}/{model}/{model}_disc_and_pred_scores_ALL.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_scores.tail(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create new df with required columns\n",
    "new_df = all_scores.copy()\n",
    "\n",
    "# convert required scores to 3 decimals and format 0as string\n",
    "cols = ['mean', 'conf_int']\n",
    "for col in cols: \n",
    "#     new_df[col+'new'] = new_df[col].apply(lambda x: '{:0.3f}'.format(x)[1:] if math.isnan(x) else str(x) )\n",
    "    new_df[col] = new_df[col].apply(lambda x: f'{x:.3f}')\n",
    "    \n",
    "new_df.tail(25)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create columns which show mean +/- std dev. \n",
    "new_df['score'] = new_df.apply( lambda row:row['mean'] + ' +/- ' + row['conf_int'], axis = 1 )\n",
    "new_df.drop(columns=['mean', 'conf_int'], inplace=True) \n",
    "new_df.tail(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#pivot out the dataset into columns \n",
    "\n",
    "non_pivoted_columns = ['metric', 'model', 'train_perc']\n",
    "pivoting_column = ['dataset']\n",
    "metrics = ['score']\n",
    "\n",
    "final_df = []\n",
    "for metric in metrics: \n",
    "    pivoted_columns = metric\n",
    "    cols = non_pivoted_columns + pivoting_column + [pivoted_columns]\n",
    "    temp_df = new_df[cols]\n",
    "    \n",
    "\n",
    "    pivoted = temp_df.pivot_table(index = non_pivoted_columns, \n",
    "                                          aggfunc=lambda x: ' '.join(x),\n",
    "                                          columns=pivoting_column, \n",
    "                                          values=pivoted_columns).reset_index()\n",
    "    \n",
    "    final_df.append(pivoted)\n",
    "\n",
    "\n",
    "final_df = pd.concat(final_df, axis=0, ignore_index=True)\n",
    "final_df.tail(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save in csv and excel formats\n",
    "final_df.to_csv(f\"./{scores_dir}/ALL_disc_and_pred_scores_PIVOTED.csv\", index=False)\n",
    "final_df.to_excel(f\"./{scores_dir}/ALL_disc_and_pred_scores_PIVOTED.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MinMaxScaler():\n",
    "    \"\"\"Min Max normalizer.\n",
    "    Args:\n",
    "    - data: original data\n",
    "\n",
    "    Returns:\n",
    "    - norm_data: normalized data\n",
    "    \"\"\"\n",
    "    def fit_transform(self, data): \n",
    "        self.fit(data)\n",
    "        scaled_data = self.transform(data)\n",
    "        return scaled_data\n",
    "\n",
    "\n",
    "    def fit(self, data):    \n",
    "        self.mini = np.min(data, 0)\n",
    "        self.range = np.max(data, 0) - self.mini\n",
    "        return self\n",
    "        \n",
    "\n",
    "    def transform(self, data):\n",
    "        numerator = data - self.mini\n",
    "        scaled_data = numerator / (self.range + 1e-7)\n",
    "        return scaled_data\n",
    "\n",
    "    \n",
    "    def inverse_transform(self, data):\n",
    "        data *= self.range\n",
    "        data += self.mini\n",
    "        return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gen_data_dir = \"../../data/generated_data/\"\n",
    "\n",
    "# our model name\n",
    "model = 'vae_conv_I'         # vae_conv_I, vae_IN, rcgan, T_forcing\n",
    "\n",
    "dataset = 'air'  # 'stocks', 'stocks2', 'air', 'sine', 'energy'\n",
    "perc = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_file_name = gen_data_dir + f'{model}/{model}_gen_samples_{dataset}_perc_{training_size}.npz'\n",
    "loaded = np.load(sample_file_name)\n",
    "gen_data = loaded['data'] \n",
    "scaler = MinMaxScaler( )  \n",
    "gen_data = scaler.fit_transform(gen_data)\n",
    "\n",
    "print(gen_data.shape)\n",
    "\n",
    "# print(gen_data.mean(axis=0).mean(axis=0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib.pyplot import figure\n",
    "\n",
    "figure(figsize=(6, 2), dpi=80)\n",
    "x = plt.plot(gen_data[0])\n",
    "plt.show()\n",
    "\n",
    "figure(figsize=(6, 2), dpi=80)\n",
    "x = plt.plot(gen_data[10])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
