{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train agents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_base = \"runs/time_consistency\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "\n",
    "base_script = [\"python\", \"qrsrm.py\", \"--env-id\", \"TradingEnv-v0\", \"--save-model\", \"--n-quantiles\", \"200\", \"--total-timesteps\", \"2000000\", \"--dir\", path_base, \"--risk-measure\", \"CVaR\", \"--alpha\", \"0.5\", \"--gamma\", \"0.99\", \"--end-e\", \"0.05\", \"--scheduler-frequency\", \"10000\"]\n",
    "\n",
    "scripts = [base_script]\n",
    "\n",
    "# Run each script\n",
    "for script in scripts:\n",
    "    print(f\"Running script {script}...\")\n",
    "    try:\n",
    "        subprocess.run(script, check=True)\n",
    "    except subprocess.CalledProcessError:\n",
    "        print(f\"Script {script} failed!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run the simulations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils import run_tc_simulation_from_dir\n",
    "from utils import AGENT_NAME_MAP\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "os.environ[\"KMP_DUPLICATE_LIB_OK\"]=\"TRUE\"\n",
    "\n",
    "#path_base = \"runs/time_consistency\"\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings(action='ignore', category=UserWarning)\n",
    "warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
    "warnings.filterwarnings('ignore', category=RuntimeWarning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Nsimulations = 20\n",
    "sim_seed = 1\n",
    "prices_tcs, quantiles_tcs, actions_tcs, rewards_tcs = run_tc_simulation_from_dir(path_base+\"/\", Nsimulations=Nsimulations, sim_seed=sim_seed)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Plot the figures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython import display\n",
    "%matplotlib inline\n",
    "\n",
    "from matplotlib import ticker\n",
    "from matplotlib import rcParams\n",
    "\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "backend_format = \"retina\"  # @param [\"retina\", \"\"]\n",
    "%config InlineBackend.figure_format = backend_format\n",
    "\n",
    "sns.set_context(\"notebook\")\n",
    "sns.set_style(\"ticks\")\n",
    "\n",
    "rcParams['ytick.right'] = True\n",
    "rcParams['axes.autolimit_mode'] = 'round_numbers'\n",
    "rcParams['axes.xmargin'] = 0\n",
    "rcParams['axes.ymargin'] = 0\n",
    "\n",
    "rcParams['figure.figsize'] = [8, 5]\n",
    "rcParams['figure.dpi'] = 150\n",
    "\n",
    "rcParams['pdf.fonttype'] = 42\n",
    "rcParams['ps.fonttype'] = 42\n",
    "\n",
    "colors = sns.color_palette(n_colors=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "iii = 11\n",
    "alpha = 0.5\n",
    "Ndt = quantiles_tcs[0].shape[0]\n",
    "probs = np.ones_like(quantiles_tcs[iii][0, :]) / len(quantiles_tcs[iii][0, :])\n",
    "y = np.cumsum(probs)\n",
    "lambda_0 = np.quantile(quantiles_tcs[iii][0, :], alpha)\n",
    "\n",
    "alphas_tc = [0.5]\n",
    "for timestep in range(1, Ndt):\n",
    "    x = prices_tcs[iii][timestep, 3] + prices_tcs[iii][timestep, 4]*quantiles_tcs[iii][timestep, :]\n",
    "    alphas_tc.append(np.interp(lambda_0, x, y))\n",
    "alphas_tc.append(0.0)\n",
    "\n",
    "df = pd.DataFrame(data=prices_tcs[iii].transpose(), columns=range(Ndt+1), index=[\"$P_t$\", \"$q_t$\", \"$t$\", \"$s_t$\", \"$c_t$\"])\n",
    "df = pd.concat([df, pd.Series(alphas_tc, index=range(Ndt+1), name=\"$\\alpha_t$\").to_frame().T], axis=0)\n",
    "df = pd.concat([df, pd.Series(actions_tcs[iii].transpose(), index=range(Ndt+1), name=\"$a_t$\").to_frame().T], axis=0)\n",
    "df = pd.concat([df, pd.Series(rewards_tcs[iii].transpose(), index=range(Ndt+1), name=\"$r_t$\").to_frame().T], axis=0)\n",
    "\n",
    "df.to_latex(path_base + \"/prices_tc.tex\", column_format='l|ccccccccccc', float_format=\"%.3f\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(8, 5))\n",
    "\n",
    "alphas_tc = [0.5]\n",
    "# Iterate through rows in the DataFrame and create a trace for each row\n",
    "x = quantiles_tcs[iii][0, :]\n",
    "plt.plot(x, y, label='t=0', linewidth=2.5, color=colors[0], zorder=3)\n",
    "plt.scatter(lambda_0,np.interp(lambda_0, x, y), color=colors[0], marker='o', zorder=2, s=3)\n",
    "for timestep in range(1, Ndt):\n",
    "    x = prices_tcs[iii][timestep, 3] + prices_tcs[iii][timestep, 4]*quantiles_tcs[iii][timestep, :]\n",
    "    plt.plot(x, y, alpha=0.5, label='t='+str(timestep), zorder=1, color=colors[timestep])\n",
    "    plt.scatter(lambda_0,np.interp(lambda_0, x, y), marker='o', zorder=2, s=5, color=colors[timestep])\n",
    "    alphas_tc.append(np.interp(lambda_0, x, y))\n",
    "\n",
    "\n",
    "plt.axvline(x=lambda_0, linestyle='solid', color='black', linewidth=0.5, label=r'$\\lambda_\\alpha$')\n",
    "\n",
    "\n",
    "#ax.set_title(r\"CDF of $s_t + c_tG(x_t, s_t, c_t, a_t)$\")\n",
    "ax.spines['right'].set_visible(True)\n",
    "ax.spines['top'].set_visible(True)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.set_xlabel(\"Discounted Future Rewards\")\n",
    "ax.set_ylabel(\"Probability\")\n",
    "\n",
    "legend = ax.legend(loc='upper left', title='', frameon=False)\n",
    "plt.setp(legend.get_lines(), linewidth=3, alpha=0.5)\n",
    "\n",
    "fig.set_facecolor('white')\n",
    "fig.tight_layout()\n",
    "\n",
    "plt.savefig(path_base + '/comparison_scG(xsca).pdf', transparent=True)\n",
    "plt.savefig(path_base + '/comparison_scG(xsca).eps', format='eps', dpi=1200)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(8, 5))\n",
    "\n",
    "# Iterate through rows in the DataFrame and create a trace for each row\n",
    "x = quantiles_tcs[iii][0, :]\n",
    "plt.plot(x, y, label='t=0', linewidth=2.5, color=colors[0], zorder=2)\n",
    "for timestep in range(1, Ndt):\n",
    "    x = quantiles_tcs[iii][timestep, :]\n",
    "    plt.plot(x, y, alpha=0.5, label='t='+str(timestep), zorder=1, color=colors[timestep])\n",
    "\n",
    "\n",
    "#ax.set_title(r\"CDF of $G(x_t, s_t, c_t, a_t)$\")\n",
    "ax.spines['right'].set_visible(True)\n",
    "ax.spines['top'].set_visible(True)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.set_xlabel(\"Discounted Future Rewards\")\n",
    "ax.set_ylabel(\"Probability\")\n",
    "\n",
    "legend = ax.legend(loc='upper left', title='', frameon=False)\n",
    "plt.setp(legend.get_lines(), linewidth=3, alpha=0.5)\n",
    "\n",
    "fig.set_facecolor('white')\n",
    "fig.tight_layout()\n",
    "\n",
    "plt.savefig(path_base + '/comparison_G(xsca).pdf', transparent=True)\n",
    "plt.savefig(path_base + '/comparison_G(xsca).eps', format='eps', dpi=1200)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "CleanRL",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
