{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "454d3cec",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from tqdm.notebook import tqdm\n",
    "import pickle\n",
    "import glob\n",
    "\n",
    "from scipy import stats as sc_stats\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.metrics import mean_absolute_percentage_error\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "71708756",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-05-13 17:33:01.698909: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c232c377",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "from os.path import dirname\n",
    "sys.path.append(dirname(\"../../\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ae3f2721",
   "metadata": {},
   "outputs": [],
   "source": [
    "from src.edl import dense_layers,dense_loss\n",
    "from src.weibull_edl import loss_and_layers\n",
    "from src.exp_utils import synthetic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e8ea111a",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cdfd4fab",
   "metadata": {},
   "source": [
    "### experiment variable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "55a0aa58",
   "metadata": {},
   "outputs": [],
   "source": [
    "EXP_NAME = \"exp2\" #experiment name\n",
    "N_EPS = 5 #number of eps values to try\n",
    "N_REGUL = 10 #number of regularisation values to try"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "31c7d056",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "../../exp_results/exp2 is empty\n"
     ]
    }
   ],
   "source": [
    "base_dir = f\"../../exp_results/{EXP_NAME}\"\n",
    "\n",
    "if len(os.listdir(base_dir)) == 0:\n",
    "    print(f\"{base_dir} is empty\")\n",
    "else:    \n",
    "    print(f\"{base_dir} is not empty, Removing files\")\n",
    "    [os.remove(f) for f in glob.glob(base_dir+\"/*\")]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1b36ab3f",
   "metadata": {},
   "source": [
    "### Helper funcs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6b216129",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_results(ax, mu, var, model_type, n): \n",
    "    ax.plot(x_test,y_test,label=\"True Test\",alpha = 1.0,color=\"black\")\n",
    "    ax.scatter(x_train,y_train,label=\"Train\")\n",
    "    ax.plot(x_test.reshape(n), mu, zorder=3, label=\"Mean Pred\",color=\"red\", alpha = 0.5)\n",
    "    if model_type == \"proposed\":\n",
    "        a = 0.5\n",
    "        b = 1\n",
    "        var_check = np.sqrt(var)\n",
    "    else:\n",
    "        a = 2\n",
    "        b = 4\n",
    "        var_check = var\n",
    "    ax.fill_between(x=x_test.reshape(n),\\\n",
    "                 y1=(mu - a * var_check).reshape(n), \\\n",
    "                 y2=(mu + a * var_check).reshape(n),\\\n",
    "                 label=f\"{a} std PI\",color=\"grey\",alpha=0.7)\n",
    "    ax.fill_between(x=x_test.reshape(n),\\\n",
    "                 y1=(mu - b * var_check).reshape(n), \\\n",
    "                 y2=(mu + b * var_check).reshape(n),\\\n",
    "                 label=f\"{b} std PI\",color=\"pink\",alpha=0.2)\n",
    "#     ax.set_ylim(-10,200)\n",
    "    ax.legend()\n",
    "    ax.set_title(f\"{model_type} Model\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "49c82ab3",
   "metadata": {},
   "source": [
    "### Experiment for synthetic data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "acadde87",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "For eps=0.2, fitted k =0.9826445060749549\n",
      "Fitting for c=1e-08\n",
      "fitting benchmark\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2023-05-13 17:34:21.583055: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=7.742636826811277e-08\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=5.994842503189409e-07\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=4.641588833612773e-06\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=3.5938136638046256e-05\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.0002782559402207126\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.002154434690031882\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.016681005372000558\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.12915496650148828\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=1.0\n",
      "fitting benchmark\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ashish1610dhiman/data_projects/iuq_project/notebooks/neurips_experiments/../../src/exp_utils/synthetic.py:49: RuntimeWarning: divide by zero encountered in divide\n",
      "  var = np.sqrt(beta / (v * (alpha - 1)))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "For eps=0.525, fitted k =1.0981879623315494\n",
      "Fitting for c=1e-08\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=7.742636826811277e-08\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=5.994842503189409e-07\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=4.641588833612773e-06\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=3.5938136638046256e-05\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.0002782559402207126\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.002154434690031882\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.016681005372000558\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=0.12915496650148828\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=1.0\n",
      "fitting benchmark\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ashish1610dhiman/data_projects/iuq_project/notebooks/neurips_experiments/../../src/exp_utils/synthetic.py:49: RuntimeWarning: divide by zero encountered in divide\n",
      "  var = np.sqrt(beta / (v * (alpha - 1)))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "For eps=0.8500000000000001, fitted k =1.1950163882441633\n",
      "Fitting for c=1e-08\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=7.742636826811277e-08\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=5.994842503189409e-07\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=4.641588833612773e-06\n",
      "fitting benchmark\n",
      "fitting proposed\n",
      "107/107 [==============================] - 0s 1ms/step\n",
      "\n",
      "Fitting for c=3.5938136638046256e-05\n",
      "fitting benchmark\n",
      "fitting proposed\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "KeyboardInterrupt\n",
      "\n"
     ]
    }
   ],
   "source": [
    "results_dict={}\n",
    "model_dict={}\n",
    "data_dict={}\n",
    "\n",
    "eps_list = np.linspace(0.2,1.5,N_EPS)\n",
    "for eps in eps_list:\n",
    "    #a.gen synthetic data\n",
    "    x_train, y_train = synthetic.gen_data_weibulll(-4, 4, 3400, eps)\n",
    "    x_test, y_test = synthetic.gen_data_weibulll(-5, 5, 3400, eps, train=False)\n",
    "    data_dict[eps] = (x_train, y_train, x_test, y_test)\n",
    "    \n",
    "    #b.get k from train data\n",
    "    rv = sc_stats.weibull_min.fit(y_train, floc=0.0)\n",
    "    k=float(rv[0])\n",
    "    print (f\"For eps={eps}, fitted k ={k}\")\n",
    "    \n",
    "    #c.plot data and save fig\n",
    "    fig,ax = plt.subplots(1,1)\n",
    "    ax.scatter(x_train,y_train,label=\"Train\")\n",
    "    ax.plot(x_test,y_test,label=\"Test\",alpha = 0.5, color=\"orange\")\n",
    "    ax.legend()\n",
    "    ax.set_title(f\"$y \\sim x^2 + {eps:.3f}*weibull(shape=1.2)$\")\n",
    "    fig.savefig(base_dir+f\"/data_gen_eps={eps}.png\")\n",
    "    plt.close()\n",
    "    \n",
    "    #d.initialise and run benchmark model\n",
    "    results_benchmark_eps = {}\n",
    "    for c_i in np.logspace(-8,0,N_REGUL):\n",
    "        try:\n",
    "            print (f\"Fitting for c={c_i}\")\n",
    "            print(\"fitting benchmark\")\n",
    "            #run benchmark model\n",
    "            mu_i, var_i, y_pred_i, benchmark_model_i = synthetic.results_benchmark_model(c_i,x_train,y_train,x_test)\n",
    "            gamma, v, alpha, beta = y_pred_i[:,0], y_pred_i[:,1], y_pred_i[:,2], y_pred_i[:,3]\n",
    "            mse_benchmark_i = mean_squared_error(y_test,mu_i)\n",
    "            nll_benchmark_i = dense_loss.NIG_NLL(y_test,gamma,v,alpha,beta).numpy()\n",
    "            results_dict[(eps,c_i,\"benchmark\")] = (mse_benchmark_i,nll_benchmark_i)\n",
    "            model_dict[(eps,c_i,\"benchmark\")] = benchmark_model_i\n",
    "            \n",
    "            print(\"fitting proposed\")\n",
    "            #run proposed model\n",
    "            mu_prop_i, var_prop_i,\\\n",
    "            y_pred_prop_i, proposed_model_i = synthetic.results_weibull_model(c_i,x_train,y_train,x_test,k,0)\n",
    "            alpha,beta = y_pred_prop_i[:,0],y_pred_prop_i[:,1]\n",
    "            mse_proposed_i = mean_squared_error(y_test,mu_prop_i)\n",
    "            nll_proposed_i = loss_and_layers.weibull_NLL(y_test,alpha,beta, k, reduce=True).numpy()\n",
    "            results_dict[(eps,c_i,\"proposed\")] = (mse_proposed_i,nll_proposed_i)\n",
    "            model_dict[(eps,c_i,\"proposed\")] = proposed_model_i\n",
    "            \n",
    "            #plot the results\n",
    "            fig,ax = plt.subplots(1,2,figsize=(12,6))\n",
    "            plot_results(ax[0],mu_i,var_i,\"benchmark\",n=3400)\n",
    "            plot_results(ax[1],mu_prop_i,var_prop_i,\"proposed\",n=3400)\n",
    "            fig.suptitle(f\"eps={eps}, c= {c_i}\")\n",
    "            fig.savefig(base_dir+f\"/results_eps={eps},c={c_i}.png\")\n",
    "            plt.close()\n",
    "        except Exception as e:\n",
    "            print (f\"Error for eps={eps}, c= {c_i}\")\n",
    "            print (e)\n",
    "        print()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "222e1558",
   "metadata": {},
   "source": [
    "### Save results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ae0fa20",
   "metadata": {},
   "outputs": [],
   "source": [
    "result_df = pd.DataFrame.from_dict(results_dict,orient=\"index\")\n",
    "result_df[['eps','c',\"model_type\"]] = pd.DataFrame(result_df.index.tolist(), index= result_df.index)\n",
    "result_df = result_df.rename(columns={0:\"mse\",1:\"NLL\"})\n",
    "result_df.to_csv(base_dir+\"/mse_nll_results.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99301956",
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_metadata = {\"data\":data_dict, \"models\": model_dict}\n",
    "\n",
    "with open(base_dir + f'/exp_metadata.pickle', 'wb') as handle:\n",
    "    pickle.dump(exp_metadata, handle)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "19d0365b",
   "metadata": {},
   "source": [
    "### Junk below"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fa66fd54",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark = result_df[result_df.model_type==\"benchmark\"]\n",
    "benchmark[\"rnk_\"] = benchmark.groupby([\"eps\",\"model_type\"])[\"mse\"].rank()\n",
    "best_benchmark = benchmark[benchmark[\"rnk_\"]==1]\n",
    "\n",
    "proposed = result_df[result_df.model_type==\"proposed\"]\n",
    "proposed[\"rnk_\"] = proposed.groupby([\"eps\",\"model_type\"])[\"mse\"].rank()\n",
    "best_proposed = proposed[proposed[\"rnk_\"]==1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1392113e",
   "metadata": {},
   "outputs": [],
   "source": [
    "best_results = best_benchmark.merge(best_proposed,on=\"eps\",suffixes=(\"_bench\",\"_prop\"))\n",
    "best_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6003c5a2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dca2bc4e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ffd43aa",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31f34e5e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "958d4d64",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f60e2c0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a750d78c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# mu_prop_i, var_prop_i,\\\n",
    "#             y_pred_prop_i, proposed_model_i = synthetic.results_weibull_model(c_i,x_train,y_train,x_test,k,1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f18dd0a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "43dcafd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# synthetic.results_weibull_model(c_i,x_train,y_train,x_test,k,1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "81d0b8b4",
   "metadata": {},
   "source": [
    "from scipy.special import loggamma"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "db9e6a68",
   "metadata": {},
   "source": [
    "def my_func(x):\n",
    "    a = loggamma(x-(2/1.05))\n",
    "    b = loggamma(x)\n",
    "    return a/b"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "57fed3f9",
   "metadata": {},
   "source": [
    "x = np.linspace(2,40,100)\n",
    "plt.plot(x,my_func(x))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2099af36",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ed3d094",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "322a0def",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b8dbc4c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
