{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "All the experiments in this notebook are made for e_{f}=1 and e_{cf}=0. To change that, change the values of label_real_test and label_cf_test, and in the cells bellow, exchange y_data_1_test and y_data_0_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sbs\n",
    "import tensorflow as tf\n",
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "from synthetic_data_generators import create_dataset, create_dataset_counterfactuals\n",
    "import sys\n",
    "sys.path.append('../..')\n",
    "from Models import CSAE, CVAE, event_predictor, forecast_model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Synthetic data generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "uniform_change = 0.7\n",
    "scale_param = 0.1\n",
    "key_step = 20\n",
    "seq_len = 30\n",
    "\n",
    "#train and eval data for the model\n",
    "train_labels, train_data = create_dataset(n = 2000, seq_len=seq_len, key_step=key_step, uniform_change=uniform_change, scale_param=scale_param) \n",
    "x_train, y_train = train_data[:,:key_step,:], train_data[:,key_step:,:]\n",
    "eval_labels, eval_data = create_dataset(n = 500, seq_len=seq_len, key_step=key_step, uniform_change=uniform_change, scale_param=scale_param) \n",
    "x_eval, y_eval = eval_data[:,:key_step,:], eval_data[:,key_step:,:]\n",
    "\n",
    "#test data \n",
    "data_0_test, data_1_test = create_dataset_counterfactuals(n = 500, seq_len=seq_len, key_step=key_step, uniform_change=uniform_change, scale_param=scale_param) \n",
    "x_data_test, y_data_0_test, _, y_data_1_test = data_0_test[:,:key_step,:], data_0_test[:,key_step:,:], data_1_test[:,:key_step,:], data_1_test[:,key_step:,:]\n",
    "label_real_test = np.full([len(x_data_test),1], 1.)  #modify 1. or 0. depending on the desired setting \n",
    "label_cf_test = np.full([len(x_data_test),1], 0.)  #modify 1. or 0. depending on the desired setting "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train predictor for effectiveness metric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictor = event_predictor()\n",
    "optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n",
    "predictor.compile(optimizer=optimizer, loss=tf.keras.losses.BinaryCrossentropy(), metrics = \"accuracy\")\n",
    "predictor.fit(y_train, train_labels, epochs= 300, validation_data = (y_eval, eval_labels), batch_size=32, verbose=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CSAE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Training CSAE\n",
    "latent_dim = 7\n",
    "Lambda = 0.19\n",
    "series_size = seq_len - key_step\n",
    "batch_size = 32\n",
    "model_sparse = CSAE(seq_len = series_size, latent_dim = latent_dim, feat_dim = 1, hidden_layer_sizes = [100,200], Lambda=Lambda)\n",
    "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)\n",
    "model_sparse.compile(optimizer, loss=model_sparse.loss_, metrics=[model_sparse.reconstruction, model_sparse.regularization])\n",
    "history = model_sparse.fit(x =[train_labels, x_train, y_train], y=y_train, validation_data = [[eval_labels, x_eval, y_eval], y_eval], epochs=200, batch_size=batch_size, verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MAE:  0.0629630140544818 MBE:  -0.011144542554720596\n"
     ]
    }
   ],
   "source": [
    "#cf mae and mbe\n",
    "pred = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))\n",
    "mae_list = np.mean((np.abs(y_data_0_test- pred)), axis=1)\n",
    "mae = np.mean(mae_list)\n",
    "mbe = np.mean(y_data_0_test- pred)\n",
    "print(\"MAE: \", mae, \"MBE: \", mbe)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total diference:  0.9996108959157944 Step diference:  0.9850414102867481\n"
     ]
    }
   ],
   "source": [
    "# total steps and aletered steps metrics\n",
    "seq_length = 10\n",
    "dif_rel_tot_csae = []\n",
    "dif_rel_steps_csae = []\n",
    "for i in range(3):\n",
    "    ini_step= 2 +i\n",
    "    fin_step = 6 +i \n",
    "    for j in range(21):\n",
    "        val = -1 + 0.1*j\n",
    "        if val != 0:\n",
    "\n",
    "            # y_data_1_test are the actuals\n",
    "            actuals = y_data_1_test\n",
    "            alteration_ = np.concatenate([np.zeros([1,ini_step,1]), np.full(([1,fin_step-ini_step,1]), val), np.zeros([1,seq_length-fin_step,1])], axis=1)\n",
    "            alteration = np.repeat(alteration_, len(x_data_test), axis=0)\n",
    "            altered_actuals = actuals + alteration\n",
    "\n",
    "            cf = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=actuals))\n",
    "            altered_cf = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=altered_actuals))\n",
    "\n",
    "            instance_dif = np.sum((altered_cf - cf), axis=1)\n",
    "            difference_steps_alt = np.sum((altered_cf[:,ini_step:fin_step,:] - cf[:,ini_step:fin_step,:]),axis=1)\n",
    "            difference_steps_no_alt = np.sum((altered_cf[:,:ini_step,:] - cf[:,:ini_step,:]),axis=1) + np.sum((altered_cf[:,fin_step:,:] - cf[:,fin_step:,:]),axis=1)\n",
    "\n",
    "            dif_ideal = (fin_step - ini_step)*val\n",
    "            dif_rel_tot_ = np.mean(instance_dif)/dif_ideal\n",
    "            dif_rel_steps_ = np.mean(difference_steps_alt)/dif_ideal\n",
    "            dif_rel_tot_csae.append(dif_rel_tot_)\n",
    "            dif_rel_steps_csae.append(dif_rel_steps_)\n",
    "\n",
    "print(\"Total diference: \", np.mean(dif_rel_tot_csae), \"Step diference: \", np.mean(dif_rel_steps_csae))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Composition:  0.053748786 Reversibility:  0.06969808 Effectiveness:  1.0\n"
     ]
    }
   ],
   "source": [
    "# composition, reversibility and effectiveness metrics\n",
    "reconstruction = model_sparse.composition(label_real_test, x_data_test, y_data_1_test, 1)\n",
    "rec_error = np.mean((np.abs(y_data_1_test- reconstruction)), axis=1)\n",
    "reversibility = model_sparse.reversibility(label_real_test, label_cf_test, x_data_test, y_data_1_test, 1)\n",
    "rev_error = np.mean((np.abs(y_data_1_test- reversibility)), axis=1)\n",
    "cf_estimation = np.array(model_sparse.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))\n",
    "pred_labels = predictor(cf_estimation)\n",
    "pred_labels_error = np.mean((np.abs(label_cf_test - pred_labels)), axis=1)\n",
    "pred_labels_binary = list(map(lambda x: 1 if(x>=0.5) else 0 , pred_labels))\n",
    "acc = accuracy_score(label_cf_test, pred_labels_binary)\n",
    "\n",
    "mean_rec_error = np.mean(rec_error)\n",
    "mean_rev_error = np.mean(rev_error)\n",
    "mean_pred_labels_error = np.mean(pred_labels_error)\n",
    "mean_eff_error = np.mean(acc)\n",
    "\n",
    "print(\"Composition: \", mean_rec_error, \"Reversibility: \", mean_rev_error, \"Effectiveness: \", mean_eff_error)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CVAE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "latent_dim = 3\n",
    "recon_weight = 90\n",
    "series_size = seq_len - key_step\n",
    "batch_size = 32\n",
    "vae_model = CVAE(seq_len = series_size, latent_dim = latent_dim, feat_dim = 1, hidden_layer_sizes = [100,200], recon_weight=recon_weight)\n",
    "optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)\n",
    "vae_model.compile(optimizer, loss=vae_model.loss_, metrics=[vae_model.reconstruction, vae_model.kl])\n",
    "history = vae_model.fit(x =[train_labels, x_train, y_train], y=y_train, validation_data = [[eval_labels, x_eval, y_eval], y_eval], epochs=250, batch_size=batch_size, verbose=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MAE:  0.13047234035219277 MBE:  0.05347146836099079\n"
     ]
    }
   ],
   "source": [
    "#cf mae and mbe\n",
    "pred = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))\n",
    "mae_list = np.mean((np.abs(y_data_0_test- pred)), axis=1)\n",
    "mae = np.mean(mae_list)\n",
    "mbe = np.mean(y_data_0_test- pred)\n",
    "print(\"MAE: \", mae, \"MBE: \", mbe)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total diference:  0.4586754665958877 Step diference:  0.3496101022130652\n"
     ]
    }
   ],
   "source": [
    "# total steps and aletered steps metrics\n",
    "seq_length = 10\n",
    "dif_rel_tot_cvae = []\n",
    "dif_rel_steps_cvae = []\n",
    "for i in range(3):\n",
    "    ini_step= 2 +i\n",
    "    fin_step = 6 +i \n",
    "    for j in range(21):\n",
    "        val = -1 + 0.1*j\n",
    "        if val != 0:\n",
    "\n",
    "            # y_data_1_test are actuals\n",
    "            actuals = y_data_1_test\n",
    "            alteration_ = np.concatenate([np.zeros([1,ini_step,1]), np.full(([1,fin_step-ini_step,1]), val), np.zeros([1,seq_length-fin_step,1])], axis=1)\n",
    "            alteration = np.repeat(alteration_, len(x_data_test), axis=0)\n",
    "            altered_actuals = actuals + alteration\n",
    "\n",
    "            cf = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=actuals))\n",
    "            altered_cf = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=altered_actuals))\n",
    "\n",
    "            instance_dif = np.sum((altered_cf - cf), axis=1)\n",
    "            difference_steps_alt = np.sum((altered_cf[:,ini_step:fin_step,:] - cf[:,ini_step:fin_step,:]),axis=1)\n",
    "            difference_steps_no_alt = np.sum((altered_cf[:,:ini_step,:] - cf[:,:ini_step,:]),axis=1) + np.sum((altered_cf[:,fin_step:,:] - cf[:,fin_step:,:]),axis=1)\n",
    "\n",
    "            dif_ideal = (fin_step - ini_step)*val\n",
    "            dif_rel_tot_ = np.mean(instance_dif)/dif_ideal\n",
    "            dif_rel_steps_ = np.mean(difference_steps_alt)/dif_ideal\n",
    "            dif_rel_tot_cvae.append(dif_rel_tot_)\n",
    "            dif_rel_steps_cvae.append(dif_rel_steps_)\n",
    "\n",
    "print(\"Total diference: \", np.mean(dif_rel_tot_cvae), \"Step diference: \", np.mean(dif_rel_steps_cvae))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Composition:  0.11366185 Reversibility:  0.12815796 Effectiveness:  1.0\n"
     ]
    }
   ],
   "source": [
    "# composition, reversibility and effectiveness metrics\n",
    "reconstruction = vae_model.composition(label_real_test, x_data_test, y_data_1_test, 1)\n",
    "rec_error = np.mean((np.abs(y_data_1_test- reconstruction)), axis=1)\n",
    "reversibility = vae_model.reversibility(label_real_test, label_cf_test, x_data_test, y_data_1_test, 1)\n",
    "rev_error = np.mean((np.abs(y_data_1_test- reversibility)), axis=1)\n",
    "cf_estimation = np.array(vae_model.cf_generation(label_real=label_real_test, label_cf=label_cf_test, x=x_data_test, y=y_data_1_test))\n",
    "pred_labels = predictor(cf_estimation)\n",
    "pred_labels_error = np.mean((np.abs(label_cf_test - pred_labels)), axis=1)\n",
    "pred_labels_binary = list(map(lambda x: 1 if(x>=0.5) else 0 , pred_labels))\n",
    "acc = accuracy_score(label_cf_test, pred_labels_binary)\n",
    "\n",
    "mean_rec_error = np.mean(rec_error)\n",
    "mean_rev_error = np.mean(rev_error)\n",
    "mean_pred_labels_error = np.mean(pred_labels_error)\n",
    "mean_eff_error = np.mean(acc)\n",
    "\n",
    "print(\"Composition: \", mean_rec_error, \"Reversibility: \", mean_rev_error, \"Effectiveness: \", mean_eff_error)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Forecast"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.History at 0x7fd95ffb3a00>"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_forecast = forecast_model(pred_steps=10)\n",
    "optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n",
    "model_forecast.compile(optimizer=optimizer, loss=\"mse\", metrics = \"mae\")\n",
    "model_forecast.fit([train_labels, x_train], y_train, epochs= 500, validation_data = ([eval_labels, x_eval], y_eval), batch_size=32, verbose=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MAE:  0.20356284 MBE:  0.015091702\n"
     ]
    }
   ],
   "source": [
    "#cf mae and mbe\n",
    "pred = model_forecast([label_cf_test, x_data_test])\n",
    "mae = np.mean(np.abs(y_data_0_test.reshape(-1,10) - pred))\n",
    "mbe = np.mean(y_data_0_test.reshape(-1,10) - pred)\n",
    "\n",
    "print(\"MAE: \", mae, \"MBE: \", mbe)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.11"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
