{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Mountaincar Environment\n",
    "* Eval can start anywhere from left to goal state, vel 0 (also training). They need 71 episodes\n",
    "* Modify cartpole to only have two actions-> left and right. The magnitude of the actions are much larger in nfq paper\n",
    "* Hint to goal, which sometimes makes the agent perform worse\n",
    "* Group: the magnitude of the action\n",
    "* Made the forces symmetric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import configargparse\n",
    "import torch\n",
    "import torch.optim as optim\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "\n",
    "from environments import MountainCarEnv, Continuous_MountainCarEnv\n",
    "from models.agents import NFQAgent\n",
    "from models.networks import NFQNetwork, ContrastiveNFQNetwork\n",
    "from util import get_logger, close_logger, load_models, make_reproducible, save_models\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import itertools\n",
    "import seaborn as sns\n",
    "import tqdm\n",
    "import json\n",
    "from train_mountaincar import fqi, warm_start, transfer_learning\n",
    "import scipy\n",
    "import json\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Running experiments"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## \"Structureless Test\"\n",
    "* The dynamics of the systems are actually the same. Does CFQI learn a difference?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from train_mountaincar import fqi, warm_start, transfer_learning\n",
    "num_iter=4\n",
    "perf_foreground = []\n",
    "perf_background = []\n",
    "for i in range(num_iter):\n",
    "    print(str(i))\n",
    "    perf_bg, perf_fg = fqi(epoch=1500, gravity=0.0025, verbose=True, is_contrastive=True, structureless=True, hint_to_goal=False)\n",
    "    perf_foreground.append(perf_fg)\n",
    "    perf_background.append(perf_bg)\n",
    "sns.distplot(perf_foreground, label='Foreground Performance')\n",
    "sns.distplot(perf_background, label='Background Performance')\n",
    "plt.legend()\n",
    "plt.xlabel(\"Average Reward Earned\")\n",
    "plt.title(\"Dynamics are the same in fg and bg environments\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## \"Performance when force left is different\"\n",
    "* We change the gravity on the foreground environments. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0025\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 2/1501 [00:00<11:33,  2.16it/s]\n",
      "  4%|▍         | 59/1501 [00:29<11:54,  2.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.003\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  4%|▎         | 54/1501 [00:19<08:53,  2.71it/s]\n",
      "  4%|▍         | 62/1501 [00:30<11:37,  2.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0035\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  1%|          | 18/1501 [00:07<10:58,  2.25it/s]\n",
      "  7%|▋         | 107/1501 [00:48<10:27,  2.22it/s]\n"
     ]
    }
   ],
   "source": [
    "num_iter=2\n",
    "results = {}\n",
    "for i in range(0, 5):\n",
    "    results[i] = {}\n",
    "    results[i]['cfqi'] = {}\n",
    "    results[i]['fqi'] = {}\n",
    "    results[i]['warm_start'] = {}\n",
    "    results[i]['transfer_learning'] = {}\n",
    "    \n",
    "initial_g = 0.0025\n",
    "for i in range(num_iter):\n",
    "    for f in range(0, 5):\n",
    "        gravity = initial_g + f*0.0005\n",
    "        print(str(gravity))\n",
    "        perf_bg, perf_fg = fqi(epoch=1500, verbose=False, is_contrastive=True, structureless=True, gravity=gravity, deep=True, hint_to_goal=False)\n",
    "        results[f]['cfqi'][i] = (perf_fg, perf_bg)\n",
    "        \n",
    "        perf_bg, perf_fg = fqi(epoch=1500, verbose=False, is_contrastive=False, structureless=True, gravity=gravity, deep=True, hint_to_goal=False)\n",
    "        results[f]['fqi'][i] = (perf_fg, perf_bg)\n",
    "        \n",
    "        perf_bg, perf_fg = warm_start(epoch=1500, verbose=False, structureless=True, gravity=gravity)\n",
    "        results[f]['warm_start'][i] = (perf_fg, perf_bg)\n",
    "        \n",
    "        perf_bg, perf_fg = transfer_learning(epoch=1500, verbose=False, structureless=True, gravity=gravity)\n",
    "        results[f]['transfer_learning'][i] = (perf_fg, perf_bg)\n",
    "        \n",
    "        \n",
    "        \n",
    "        \n",
    "    with open('gravity_v_performance.json', 'w') as f:\n",
    "        json.dump(results, f) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mean_confidence_interval(data, confidence=0.95):\n",
    "    a = 1.0 * np.array(data)\n",
    "    n = len(a)\n",
    "    m, se = np.mean(a), scipy.stats.sem(a)\n",
    "    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)\n",
    "    return m, h\n",
    "\n",
    "def plot_performance(results, x, ds='bg'):\n",
    "    c_success = []\n",
    "    f_success = []\n",
    "    w_success = []\n",
    "    t_success = []\n",
    "    c_errs = []\n",
    "    f_errs = []\n",
    "    w_errs = []\n",
    "    t_errs = []\n",
    "    if ds == 'bg':\n",
    "        ind = 1\n",
    "    else:\n",
    "        ind = 0\n",
    "    for i in range(0, 5):\n",
    "        cfqi_perf = []\n",
    "        fqi_perf = []\n",
    "        ws_perf = []\n",
    "        tl_perf = []\n",
    "        for key in results[i]['fqi']:\n",
    "            fqi_perf.append(results[i]['fqi'][key][ind])\n",
    "        for key in results[i]['cfqi']:\n",
    "            cfqi_perf.append(results[i]['cfqi'][key][ind])\n",
    "        for key in results[i]['warm_start']:\n",
    "            ws_perf.append(results[i]['warm_start'][key][ind])\n",
    "        for key in results[i]['transfer_learning']:\n",
    "            tl_perf.append(results[i]['transfer_learning'][key][ind])\n",
    "\n",
    "        c_success.append(np.mean(cfqi_perf))\n",
    "        f_success.append(np.mean(fqi_perf))\n",
    "        w_success.append(np.mean(ws_perf))\n",
    "        t_success.append(np.mean(tl_perf))\n",
    "        m, h = mean_confidence_interval(cfqi_perf)\n",
    "        c_errs.append(h)\n",
    "        m, h = mean_confidence_interval(fqi_perf)\n",
    "        f_errs.append(h)\n",
    "        m, h = mean_confidence_interval(ws_perf)\n",
    "        w_errs.append(h)\n",
    "        m, h = mean_confidence_interval(tl_perf)\n",
    "        t_errs.append(h) \n",
    "\n",
    "    plt.figure(figsize=(10, 4))\n",
    "    sns.scatterplot(x, c_success, label='CFQI')\n",
    "    plt.errorbar(x, c_success ,yerr=c_errs, linestyle=\"None\")\n",
    "    sns.scatterplot(x, f_success, label='FQI')\n",
    "    plt.errorbar(x, f_success ,yerr=f_errs, linestyle=\"None\")\n",
    "    sns.scatterplot(x, w_success, label='Warm Start')\n",
    "    plt.errorbar(x, w_success ,yerr=w_errs, linestyle=\"None\")\n",
    "    sns.scatterplot(x, t_success, label='Transfer Learning')\n",
    "    plt.errorbar(x, t_success ,yerr=t_errs, linestyle=\"None\")\n",
    "    if ds == 'bg':\n",
    "        plt.title(\"Background Dataset: Performance of CFQI, FQI, Warm Start, Transfer Learning when gravity is modified\")\n",
    "    else:\n",
    "        plt.title(\"Foreground Dataset: Performance of CFQI, FQI, Warm Start, Transfer Learning when gravity is modified\")\n",
    "    plt.xlabel(\"Gravity\")\n",
    "    plt.ylabel(\"Reward\")\n",
    "    plt.show()  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "initial_gravity = 0.0025\n",
    "x = []\n",
    "for i in range(5):\n",
    "    x.append(initial_gravity + i*0.005)\n",
    "plot_performance(results, x, ds='bg')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Group imbalance test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_iter = 2\n",
    "results = {}\n",
    "\n",
    "GRAVITY = 0.004\n",
    "\n",
    "total_samples = 400\n",
    "fg_sample_fractions = [0.1 * x for x in np.arange(1, 6)]\n",
    "\n",
    "for i in fg_sample_fractions:\n",
    "    results[i] = {}\n",
    "    results[i][\"fg_only\"] = {}\n",
    "    results[i][\"cfqi\"] = {}\n",
    "    results[i][\"fqi_joint\"] = {}\n",
    "    \n",
    "for i in range(num_iter):\n",
    "\n",
    "    for fg_sample_fraction in fg_sample_fractions:\n",
    "\n",
    "        n_fg = int(total_samples * fg_sample_fraction)\n",
    "        n_bg = int(total_samples - n_fg)\n",
    "        \n",
    "        # Only train/test on small set of foreground samples\n",
    "        perf_bg, perf_fg = fqi(epoch=1500, verbose=False, is_contrastive=True, structureless=False, gravity=GRAVITY, fg_only=True, init_experience_bg=n_fg // 2,\n",
    "            init_experience_fg=n_fg // 2)\n",
    "        results[fg_sample_fraction][\"fg_only\"][i] = (perf_bg, perf_fg)\n",
    "\n",
    "        # Use contrastive model with larger pool of background samples\n",
    "        perf_bg, perf_fg = fqi(epoch=1500, is_contrastive=True,init_experience_bg=n_bg,init_experience_fg=n_fg,fg_only=False,verbose=False,gravity=GRAVITY)\n",
    "        results[fg_sample_fraction][\"cfqi\"][i] = (perf_bg, perf_fg)\n",
    "\n",
    "        # Use non-contrastive model with larger pool of background samples\n",
    "        perf_bg, perf_fg = fqi(is_contrastive=False,init_experience_bg=n_bg,init_experience_fg=n_fg,fg_only=False,gravity=GRAVITY,epoch=1500,verbose=False,)\n",
    "        results[fg_sample_fraction][\"fqi_joint\"][i] = (perf_bg, perf_fg)\n",
    "\n",
    "        with open(\"class_imbalance_cfqi.json\", \"w\") as f:\n",
    "            json.dump(results, f)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "research [~/.conda/envs/research/]",
   "language": "python",
   "name": "conda_research"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
