{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train agents"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_base = \"runs/quantile_number\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "\n",
    "shared_hyperparameters = [\"--env-id\", \"TradingEnv-v0\", \"--save-model\", \"--dir\", path_base]\n",
    "\n",
    "base_script1 = [\"python\", \"qrsrm.py\", \"--risk-measure\", \"CVaR\", \"--alpha\", \"1.0\"] + shared_hyperparameters\n",
    "base_script2 = [\"python\", \"qrdqn.py\"] + shared_hyperparameters\n",
    "\n",
    "n_quantiles1 = [\"10\", \"30\", \"50\", \"70\", \"100\", \"200\"]\n",
    "n_quantiles2 = [\"50\", \"200\"]\n",
    "\n",
    "scripts1 = [base_script1 + [\"--n-quantiles\", nq] for nq in n_quantiles1]\n",
    "scripts2 = [base_script2 + [\"--n-quantiles\", nq] for nq in n_quantiles2]\n",
    "\n",
    "scripts = scripts1 + scripts2\n",
    "\n",
    "seeds = [\"1\", \"2\", \"3\", \"4\", \"5\"]\n",
    "scripts = [script + [\"--seed\", seed] for script in scripts for seed in seeds]\n",
    "# Run each script\n",
    "for script in scripts:\n",
    "    print(f\"Running script {script}...\")\n",
    "    try:\n",
    "        subprocess.run(script, check=True)\n",
    "    except subprocess.CalledProcessError:\n",
    "        print(f\"Script {script} failed!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run the simulations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils import run_simulation_from_dir, add_columns_quantile, make_agent_hue_kws\n",
    "from utils import load_data_from_dir, load_theta_diff_from_dir, smooth_dataframe\n",
    "from utils import AGENT_NAME_MAP\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "os.environ[\"KMP_DUPLICATE_LIB_OK\"]=\"TRUE\"\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore', category=UserWarning)\n",
    "warnings.filterwarnings('ignore', category=DeprecationWarning)\n",
    "warnings.filterwarnings('ignore', category=RuntimeWarning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# wait for pandas update or switch to CleanRL env\n",
    "path_base = \"runs/quantile_number\"\n",
    "Nsimulations = 10000\n",
    "sim_seed = 1\n",
    "pkl_path = path_base+\"/df_exp.pkl\"\n",
    "if \"df_exp.pkl\" not in os.listdir(path_base):\n",
    "    df_exp = run_simulation_from_dir(path_base + \"/\", Nsimulations=Nsimulations, sim_seed=sim_seed)\n",
    "    df_exp.to_pickle(pkl_path)\n",
    "\n",
    "df_exp = pd.read_pickle(pkl_path)\n",
    "df_exp = df_exp.pipe(add_columns_quantile)\n",
    "df_exp = df_exp.sort_values(by=['agent', 'risk_measure', 'alpha', 'n_quantile', 'environment_name', 'agent_seed']).reset_index(drop=True)\n",
    "df_exp.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_base = \"runs/quantile_number\"\n",
    "pkl_path = path_base+\"/df_theta.pkl\"\n",
    "if \"df_theta.pkl\" not in os.listdir(path_base):\n",
    "    df_theta = load_theta_diff_from_dir(path_base + \"/\")\n",
    "    df_theta.to_pickle(pkl_path)\n",
    "\n",
    "df_theta = pd.read_pickle(pkl_path)\n",
    "df_theta['diff'] = df_theta['value']\n",
    "df_theta = df_theta.groupby(['environment_name', 'agent', 'risk_measure', 'alpha', 'n_quantile', 'agent_seed']).apply(lambda x: x.iloc[1:]).reset_index(drop=True)\n",
    "df_theta = df_theta.pipe(add_columns_quantile).pipe(smooth_dataframe)\n",
    "df_theta = df_theta.sort_values(by=['agent', 'risk_measure', 'alpha', 'n_quantile', 'environment_name', 'agent_seed', 'wall_time']).reset_index(drop=True)\n",
    "df_theta.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_base = \"runs/quantile_number\"\n",
    "pkl_path = path_base+\"/df_training.pkl\"\n",
    "if \"df_training.pkl\" not in os.listdir(path_base):\n",
    "    df_training = load_data_from_dir(path_base + \"/\")\n",
    "    df_training.to_pickle(pkl_path)\n",
    "\n",
    "df_training = pd.read_pickle(pkl_path)\n",
    "df_training = df_training.pipe(add_columns_quantile).pipe(smooth_dataframe)\n",
    "df_training['wall_time_rounded'] = df_training['wall_time'].round()\n",
    "df_training = df_training.sort_values(by=['agent', 'risk_measure', 'alpha', 'n_quantile', 'environment_name', 'agent_seed', 'wall_time']).reset_index(drop=True)\n",
    "df_training.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Plot the figures"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython import display\n",
    "%matplotlib inline\n",
    "\n",
    "from matplotlib import ticker\n",
    "from matplotlib import rcParams\n",
    "\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "backend_format = \"retina\"  # @param [\"retina\", \"\"]\n",
    "%config InlineBackend.figure_format = backend_format\n",
    "\n",
    "sns.set_context(\"notebook\")\n",
    "sns.set_style(\"ticks\")\n",
    "\n",
    "rcParams['ytick.right'] = True\n",
    "rcParams['axes.autolimit_mode'] = 'round_numbers'\n",
    "rcParams['axes.xmargin'] = 0\n",
    "rcParams['axes.ymargin'] = 0\n",
    "\n",
    "rcParams['figure.figsize'] = [8, 5]\n",
    "rcParams['figure.dpi'] = 150\n",
    "\n",
    "rcParams['pdf.fonttype'] = 42\n",
    "rcParams['ps.fonttype'] = 42\n",
    "\n",
    "colors = sns.color_palette(n_colors=10)\n",
    "fig_size = (8, 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "index_columns=['agent', 'risk_measure', 'alpha', 'sim_seed', 'Model']\n",
    "columns=['rewards']\n",
    "\n",
    "def mean_value(group):\n",
    "    # Perform some operation on the group\n",
    "    result = group.mean()  # Replace this with your actual operation\n",
    "    return result\n",
    "\n",
    "df_grouped = df_exp.groupby(index_columns)[columns].agg([\n",
    "    (r\"$\\mathbb{E}$\", mean_value),\n",
    "])\n",
    "\n",
    "\n",
    "# Drop 'rewards' level from columns\n",
    "df_grouped.columns = df_grouped.columns.droplevel(0)\n",
    "df_grouped.index = df_grouped.index.droplevel([0,1,2,3])\n",
    "#df_grouped = df_grouped.reset_index()\n",
    "df_grouped"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent_id_ordered_by_performance = [\n",
    "    'qrdqn_200',\n",
    "    'qrsrm_200',\n",
    "    'qrsrm_100',\n",
    "    'qrsrm_70',\n",
    "    'qrsrm_50',\n",
    "    'qrsrm_30',\n",
    "    'qrsrm_10',\n",
    "]\n",
    "\n",
    "experiments_ordered_by_performance = [dict(agent_id=agent_id,\n",
    "                                           agent_name=f\"{AGENT_NAME_MAP[agent_id.split('_')[0]]}(\" +  r\"N=\" +  f\"{agent_id.split('_')[1]})\",\n",
    "                                           color=colors[i]) \n",
    "                                           for i, agent_id in enumerate(agent_id_ordered_by_performance)]\n",
    "\n",
    "agent_names, hue_kws = make_agent_hue_kws(experiments_ordered_by_performance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=fig_size)\n",
    "\n",
    "sns.histplot(data=df_exp[df_exp['Model'].isin(agent_names)], \n",
    "             x=\"rewards\", \n",
    "             hue=\"Model\", \n",
    "             stat=\"density\", \n",
    "             common_norm=False, \n",
    "             alpha=0.5,\n",
    "             bins=200, \n",
    "             kde=True,\n",
    "             hue_order=agent_names,\n",
    "             palette=colors,\n",
    "             legend='brief',\n",
    "             ax=ax,\n",
    "    )\n",
    "    \n",
    "\n",
    "line_styles = ['dotted', 'dashed', 'solid']\n",
    "# Add vertical lines\n",
    "for agent, value in df_grouped.iterrows():\n",
    "    if agent in agent_names:\n",
    "        for i, v in enumerate(value):\n",
    "            ax.vlines(x=v, \n",
    "                    ymin=0, \n",
    "                    ymax=0.8, \n",
    "                    colors=hue_kws['color'][agent_names.index(agent)], \n",
    "                    linestyles=line_styles[2],\n",
    "                    alpha=1,\n",
    "                    linewidth=1,\n",
    "                    )\n",
    "    \n",
    "ax.set_title('')\n",
    "ax.spines['right'].set_visible(True)\n",
    "ax.spines['top'].set_visible(True)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.set_xlabel('Discounted Future Rewards')\n",
    "ax.set_ylabel('Density')\n",
    "ax.set_xlim([-4, 6])\n",
    "\n",
    "# Get the existing legend\n",
    "legend = ax.get_legend()\n",
    "\n",
    "# Modify the legend\n",
    "legend.set_title('')\n",
    "legend.set_frame_on(False)\n",
    "# move to upper left\n",
    "\n",
    "plt.setp(legend.get_lines(), linewidth=3, alpha=0.5)\n",
    "\n",
    "fig.set_facecolor('white')\n",
    "fig.tight_layout()\n",
    "\n",
    "plt.savefig(path_base + '/comparison_quantile_number.pdf', transparent=True)\n",
    "plt.savefig(path_base + '/comparison_quantile_number.eps', format='eps', dpi=1200)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent_id_ordered_by_performance2 = [\n",
    "    'qrdqn_200',\n",
    "    'qrsrm_200',\n",
    "]\n",
    "\n",
    "experiments_ordered_by_performance2 = [dict(agent_id=agent_id,\n",
    "                                           agent_name=f\"{AGENT_NAME_MAP[agent_id.split('_')[0]]}(\" +  r\"N=\" +  f\"{agent_id.split('_')[1]})\",\n",
    "                                           color=colors[i]) \n",
    "                                           for i, agent_id in enumerate(agent_id_ordered_by_performance2)]\n",
    "\n",
    "agent_names2, hue_kws2 = make_agent_hue_kws(experiments_ordered_by_performance2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=fig_size)\n",
    "\n",
    "def create_func(i):\n",
    "    return lambda x: np.mean(x[x < np.quantile(x, i)])\n",
    "\n",
    "q = np.linspace(0, 1, 1001)\n",
    "funcs = [(i, create_func(i)) for i in q]\n",
    "\n",
    "df_cvar = df_exp.groupby(['agent_id', 'Model'])['rewards'].agg(funcs).reset_index()\n",
    "melted_df = df_cvar.melt(id_vars=['agent_id', 'Model'], var_name='alpha', value_name='cvar_value')\n",
    "melted_df_selected = melted_df[melted_df['agent_id'].isin(['qrdqn_200', 'qrsrm_200'])]\n",
    "\n",
    "sns.lineplot(data=melted_df_selected,\n",
    "             x='alpha',\n",
    "             y='cvar_value',\n",
    "             hue=\"Model\", \n",
    "             alpha=1,\n",
    "             hue_order=agent_names2,\n",
    "             palette=hue_kws2['color'],\n",
    "             legend='brief',\n",
    "             ax=ax,\n",
    "    )\n",
    "    \n",
    "ax.set_title('')\n",
    "ax.spines['right'].set_visible(True)\n",
    "ax.spines['top'].set_visible(True)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.set_xlabel(r\"$\\alpha$\")\n",
    "ax.set_ylabel(r\"$\\operatorname{CVaR}_{\\alpha}(G)$\")\n",
    "\n",
    "ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=10))\n",
    "ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=10))\n",
    "\n",
    "legend = ax.legend(loc='lower right', title='', frameon=False)\n",
    "plt.setp(legend.get_lines(), linewidth=3, alpha=0.5)\n",
    "\n",
    "fig.set_facecolor('white')\n",
    "fig.tight_layout()\n",
    "\n",
    "plt.savefig(path_base + '/comparison_cvar_mean.pdf', transparent=True)\n",
    "plt.savefig(path_base + '/comparison_cvar_mean.eps', format='eps', dpi=1200)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convergence "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent_id_ordered_by_performance = [\n",
    "    'qrdqn_50',\n",
    "    'qrdqn_200',\n",
    "    'qrsrm_200',\n",
    "    'qrsrm_100',\n",
    "    'qrsrm_70',\n",
    "    'qrsrm_50',\n",
    "    'qrsrm_30',\n",
    "    'qrsrm_10',\n",
    "]\n",
    "\n",
    "experiments_ordered_by_performance = [dict(agent_id=agent_id,\n",
    "                                           agent_name=f\"{AGENT_NAME_MAP[agent_id.split('_')[0]]}(\" +  r\"N=\" +  f\"{agent_id.split('_')[1]})\",\n",
    "                                           color=colors[i]) \n",
    "                                           for i, agent_id in enumerate(agent_id_ordered_by_performance)]\n",
    "\n",
    "agent_names, hue_kws = make_agent_hue_kws(experiments_ordered_by_performance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=fig_size)\n",
    "\n",
    "df_training_selected = df_training.groupby(['environment_name', 'agent', 'risk_measure', 'alpha', 'n_quantile', 'agent_seed']).apply(lambda x: x.iloc[::100]).reset_index(drop=True)\n",
    "random_mean = -8\n",
    "dqn_best = df_training_selected[(df_training_selected['agent'] == 'qrdqn') & (df_training_selected['n_quantile'] == 50)]['value'].iloc[-10:].mean()\n",
    "\n",
    "sns.lineplot(\n",
    "    data=df_training_selected,\n",
    "    x=df_training_selected[\"step\"]/1000.0,\n",
    "    y=100.0*(df_training_selected[\"value\"] - random_mean)/(dqn_best - random_mean),\n",
    "    hue=\"Model\",\n",
    "    #estimator=np.median,\n",
    "    #errorbar=('ci', 95),\n",
    "    alpha=0.7,\n",
    "    linewidth=3,\n",
    "    hue_order=agent_names,\n",
    "    palette=hue_kws['color'],\n",
    "    legend='brief',\n",
    "    ax=ax,\n",
    ")\n",
    "ax.set_title('')\n",
    "ax.spines['right'].set_visible(True)\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.set_xlabel('Step (Thousands)')\n",
    "ax.set_ylabel('Normalized Score')\n",
    "#ax.set(ylim=(0, 250))\n",
    "\n",
    "ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=10))\n",
    "ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=5))\n",
    "ax.yaxis.set_major_formatter(ticker.PercentFormatter())\n",
    "\n",
    "legend = ax.legend(loc='lower right', title='', frameon=False)\n",
    "plt.setp(legend.get_lines(), linewidth=3, alpha=0.5)\n",
    "\n",
    "fig.set_facecolor('white')\n",
    "fig.tight_layout()\n",
    "plt.savefig(path_base + '/performance_step.pdf', transparent=True)\n",
    "plt.savefig(path_base + '/performance_step.eps', format='eps', dpi=1200)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_training_time = df_training.groupby(['environment_name', 'environment_pretty', 'agent', 'agent_id', 'risk_measure', 'alpha', 'n_quantile', 'agent_seed', 'Model', 'wall_time_rounded']).mean().reset_index(drop=False)\n",
    "df_training_time = df_training_time.groupby(['environment_name', 'environment_pretty', 'agent', 'agent_id', 'risk_measure', 'alpha', 'n_quantile', 'agent_seed']).apply(lambda x: x.iloc[::10]).reset_index(drop=True)\n",
    "df_training_time = df_training_time[(df_training_time['agent_seed'] == 1)]\n",
    "dqn_time = df_training_time[(df_training_time['agent'] == 'qrdqn') & (df_training_time['n_quantile'] == 50)]['wall_time_rounded'].iloc[-1]\n",
    "dqn_best = df_training_time[(df_training_time['agent'] == 'qrdqn') & (df_training_time['n_quantile'] == 50)]['value'].iloc[-10:].mean()\n",
    "\n",
    "random_mean = -7\n",
    "\n",
    "fig, ax = plt.subplots(figsize=fig_size)\n",
    "sns.lineplot(\n",
    "    data=df_training_time,\n",
    "    x=100.0*df_training_time[\"wall_time_rounded\"]/dqn_time,\n",
    "    y=100.0*(df_training_time[\"value\"] - random_mean)/(dqn_best - random_mean),\n",
    "    hue=\"Model\",\n",
    "    #estimator=np.median,\n",
    "    #errorbar=('ci', 95),\n",
    "    alpha=0.7,\n",
    "    linewidth=3,\n",
    "    hue_order=agent_names,\n",
    "    palette=hue_kws['color'],\n",
    "    legend='brief',\n",
    "    ax=ax,\n",
    ")\n",
    "final_values = df_training_time.groupby('Model')['value'].last()\n",
    "#min_values = df_training_time.groupby('Model')['value'].min()\n",
    "\n",
    "# Add vertical lines\n",
    "for agent, value in final_values.items():\n",
    "    ax.vlines(x=100.0*df_training_time.loc[df_training_time['Model'] == agent, \"wall_time_rounded\"].max()/dqn_time, \n",
    "              ymin=-125, \n",
    "              ymax=-120, \n",
    "              colors=hue_kws['color'][agent_names.index(agent)], \n",
    "              linestyles='solid',\n",
    "              alpha=0.7,\n",
    "              linewidth=3)\n",
    "\n",
    "\n",
    "ax.set_title('')\n",
    "ax.spines['right'].set_visible(True)\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.set_xlabel('Normalized Time')\n",
    "ax.set_ylabel('Normalized Score')\n",
    "#ax.set(ylim=(0, 250))\n",
    "\n",
    "ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=10))\n",
    "ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=12))\n",
    "ax.xaxis.set_major_formatter(ticker.PercentFormatter())\n",
    "ax.yaxis.set_major_formatter(ticker.PercentFormatter())\n",
    "\n",
    "legend = ax.legend(loc='lower right', title='', frameon=False)\n",
    "plt.setp(legend.get_lines(), linewidth=3, alpha=0.5)\n",
    "\n",
    "fig.set_facecolor('white')\n",
    "fig.tight_layout()\n",
    "plt.savefig(path_base + '/performance_time.pdf', transparent=True)\n",
    "plt.savefig(path_base + '/performance_time.eps', format='eps', dpi=1200)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(8, 5))\n",
    "\n",
    "df_theta_single = df_theta[(df_theta['agent'] == 'qrsrm') & (df_theta['n_quantile'] == 200)]\n",
    "\n",
    "sns.lineplot(\n",
    "    data = df_theta_single,\n",
    "    x=df_theta_single[\"step\"]/1000.0,\n",
    "    y=\"diff\",\n",
    "    hue=\"Model\",\n",
    "    alpha=0.3,\n",
    "    linewidth=1,\n",
    "    #hue_order=agent_names,\n",
    "    color=colors[0],\n",
    "    legend=None,\n",
    "    ax=ax,\n",
    ")\n",
    "sns.lineplot(\n",
    "    data = df_theta_single,\n",
    "    x=df_theta_single[\"step\"]/1000.0,\n",
    "    y=\"value\",\n",
    "    hue=\"Model\",\n",
    "    alpha=1,\n",
    "    linewidth=3,\n",
    "    #hue_order=agent_names,\n",
    "    color=colors[0],\n",
    "    legend='brief',\n",
    "    ax=ax,\n",
    ")\n",
    "\n",
    "ax.set_title('')\n",
    "ax.spines['right'].set_visible(True)\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['left'].set_visible(True)\n",
    "ax.spines['bottom'].set_visible(True)\n",
    "ax.set_xlabel('Step (Thousands)')\n",
    "ax.set_ylabel('Mean Absolute Error')\n",
    "#ax.set(ylim=(0, 0.5))\n",
    "\n",
    "ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=10))\n",
    "ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=12))\n",
    "\n",
    "legend = ax.legend(loc='upper right', title='', frameon=False)\n",
    "plt.setp(legend.get_lines(), linewidth=3, alpha=0.5)\n",
    "\n",
    "fig.set_facecolor('white')\n",
    "fig.tight_layout()\n",
    "plt.savefig(path_base + '/performance_diff.pdf', transparent=True)\n",
    "plt.savefig(path_base + '/performance_diff.eps', format='eps', dpi=1200)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "CleanRL",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
