{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "\n",
    "from stable_baselines3 import PPO\n",
    "from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise\n",
    "from stable_baselines3.common.evaluation import evaluate_policy\n",
    "import time\n",
    "import numpy as np\n",
    "\n",
    "from torch import nn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/gym/logger.py:34: UserWarning: \u001b[33mWARN: Environment '<class 'gym.envs.safety.acc.ACCEnv2'>' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior.\u001b[0m\n",
      "  warnings.warn(colorize(\"%s: %s\" % (\"WARN\", msg % args), \"yellow\"))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using cuda device\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n"
     ]
    }
   ],
   "source": [
    "env = gym.make(\"acc-variant-v1\")\n",
    "model = PPO(\"MlpPolicy\", env, verbose=1,policy_kwargs={\"activation_fn\":nn.ReLU})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/stable_baselines3/common/evaluation.py:69: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n",
      "  UserWarning,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:-1247.70 +/- 682.92\n"
     ]
    }
   ],
   "source": [
    "# Use a separate environement for evaluation\n",
    "eval_env = gym.make('acc-variant-v1')\n",
    "\n",
    "# Random Agent, before training\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------\n",
      "| rollout/           |           |\n",
      "|    ep_len_mean     | 27.9      |\n",
      "|    ep_rew_mean     | -1.46e+03 |\n",
      "| time/              |           |\n",
      "|    fps             | 491       |\n",
      "|    iterations      | 1         |\n",
      "|    time_elapsed    | 4         |\n",
      "|    total_timesteps | 2048      |\n",
      "----------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 38.5         |\n",
      "|    ep_rew_mean          | -1.51e+03    |\n",
      "| time/                   |              |\n",
      "|    fps                  | 380          |\n",
      "|    iterations           | 2            |\n",
      "|    time_elapsed         | 10           |\n",
      "|    total_timesteps      | 4096         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0072601833 |\n",
      "|    clip_fraction        | 0.071        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.42        |\n",
      "|    explained_variance   | 0.000427     |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.76e+05     |\n",
      "|    n_updates            | 10           |\n",
      "|    policy_gradient_loss | -0.00295     |\n",
      "|    std                  | 1            |\n",
      "|    value_loss           | 4.38e+05     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 51.8         |\n",
      "|    ep_rew_mean          | -1.5e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 355          |\n",
      "|    iterations           | 3            |\n",
      "|    time_elapsed         | 17           |\n",
      "|    total_timesteps      | 6144         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0056191483 |\n",
      "|    clip_fraction        | 0.0871       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.42        |\n",
      "|    explained_variance   | 0.0764       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.41e+04     |\n",
      "|    n_updates            | 20           |\n",
      "|    policy_gradient_loss | -0.00642     |\n",
      "|    std                  | 1            |\n",
      "|    value_loss           | 1.71e+05     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 68.6         |\n",
      "|    ep_rew_mean          | -1.37e+03    |\n",
      "| time/                   |              |\n",
      "|    fps                  | 342          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 23           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0072147776 |\n",
      "|    clip_fraction        | 0.102        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.42        |\n",
      "|    explained_variance   | 0.443        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.03e+04     |\n",
      "|    n_updates            | 30           |\n",
      "|    policy_gradient_loss | -0.00866     |\n",
      "|    std                  | 1            |\n",
      "|    value_loss           | 1.1e+05      |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 85.9        |\n",
      "|    ep_rew_mean          | -1.25e+03   |\n",
      "| time/                   |             |\n",
      "|    fps                  | 335         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 30          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008611243 |\n",
      "|    clip_fraction        | 0.0966      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.41       |\n",
      "|    explained_variance   | 0.368       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.6e+03     |\n",
      "|    n_updates            | 40          |\n",
      "|    policy_gradient_loss | -0.00471    |\n",
      "|    std                  | 0.982       |\n",
      "|    value_loss           | 4.88e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<stable_baselines3.ppo.ppo.PPO at 0x7fa3079be1d0>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.learn(total_timesteps=10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:2728.13 +/- 2492.26\n"
     ]
    }
   ],
   "source": [
    "# Random Agent, before training\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 178      |\n",
      "|    ep_rew_mean     | -312     |\n",
      "| time/              |          |\n",
      "|    fps             | 420      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 226         |\n",
      "|    ep_rew_mean          | 35.2        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 339         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 12          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006717869 |\n",
      "|    clip_fraction        | 0.0871      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.37       |\n",
      "|    explained_variance   | 0.567       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.71e+03    |\n",
      "|    n_updates            | 60          |\n",
      "|    policy_gradient_loss | -0.00504    |\n",
      "|    std                  | 0.945       |\n",
      "|    value_loss           | 2.31e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 206         |\n",
      "|    ep_rew_mean          | -281        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 321         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 19          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.021197053 |\n",
      "|    clip_fraction        | 0.127       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.33       |\n",
      "|    explained_variance   | 0.61        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 160         |\n",
      "|    n_updates            | 70          |\n",
      "|    policy_gradient_loss | -0.000941   |\n",
      "|    std                  | 0.892       |\n",
      "|    value_loss           | 7.69e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 218          |\n",
      "|    ep_rew_mean          | -134         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 307          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 26           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0063815485 |\n",
      "|    clip_fraction        | 0.0871       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.3         |\n",
      "|    explained_variance   | 0.384        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.29e+04     |\n",
      "|    n_updates            | 80           |\n",
      "|    policy_gradient_loss | -0.00376     |\n",
      "|    std                  | 0.885        |\n",
      "|    value_loss           | 6.89e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 230          |\n",
      "|    ep_rew_mean          | -21.8        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 301          |\n",
      "|    iterations           | 5            |\n",
      "|    time_elapsed         | 33           |\n",
      "|    total_timesteps      | 10240        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0042401487 |\n",
      "|    clip_fraction        | 0.0573       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.29        |\n",
      "|    explained_variance   | 0.674        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.2e+03      |\n",
      "|    n_updates            | 90           |\n",
      "|    policy_gradient_loss | 0.00118      |\n",
      "|    std                  | 0.875        |\n",
      "|    value_loss           | 1.15e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 236         |\n",
      "|    ep_rew_mean          | 62.2        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 312         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 39          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.020865686 |\n",
      "|    clip_fraction        | 0.158       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.26       |\n",
      "|    explained_variance   | 0.928       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 356         |\n",
      "|    n_updates            | 100         |\n",
      "|    policy_gradient_loss | -0.00187    |\n",
      "|    std                  | 0.837       |\n",
      "|    value_loss           | 1.61e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 236          |\n",
      "|    ep_rew_mean          | 59           |\n",
      "| time/                   |              |\n",
      "|    fps                  | 317          |\n",
      "|    iterations           | 7            |\n",
      "|    time_elapsed         | 45           |\n",
      "|    total_timesteps      | 14336        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0032823482 |\n",
      "|    clip_fraction        | 0.0632       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.0801       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.18e+03     |\n",
      "|    n_updates            | 110          |\n",
      "|    policy_gradient_loss | 0.00229      |\n",
      "|    std                  | 0.832        |\n",
      "|    value_loss           | 1.26e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 215        |\n",
      "|    ep_rew_mean          | -53.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 313        |\n",
      "|    iterations           | 8          |\n",
      "|    time_elapsed         | 52         |\n",
      "|    total_timesteps      | 16384      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00870312 |\n",
      "|    clip_fraction        | 0.131      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.23      |\n",
      "|    explained_variance   | 0.893      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.22e+03   |\n",
      "|    n_updates            | 120        |\n",
      "|    policy_gradient_loss | -0.00334   |\n",
      "|    std                  | 0.82       |\n",
      "|    value_loss           | 3.54e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 183         |\n",
      "|    ep_rew_mean          | -216        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 59          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001461111 |\n",
      "|    clip_fraction        | 0.0751      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.22       |\n",
      "|    explained_variance   | 0.201       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.83e+04    |\n",
      "|    n_updates            | 130         |\n",
      "|    policy_gradient_loss | 0.000904    |\n",
      "|    std                  | 0.82        |\n",
      "|    value_loss           | 6.57e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 189          |\n",
      "|    ep_rew_mean          | -185         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 311          |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 65           |\n",
      "|    total_timesteps      | 20480        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0041481336 |\n",
      "|    clip_fraction        | 0.0682       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.22        |\n",
      "|    explained_variance   | 0.318        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.48e+04     |\n",
      "|    n_updates            | 140          |\n",
      "|    policy_gradient_loss | -0.00771     |\n",
      "|    std                  | 0.821        |\n",
      "|    value_loss           | 6.16e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 189          |\n",
      "|    ep_rew_mean          | -176         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 310          |\n",
      "|    iterations           | 11           |\n",
      "|    time_elapsed         | 72           |\n",
      "|    total_timesteps      | 22528        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0071474286 |\n",
      "|    clip_fraction        | 0.0704       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.22        |\n",
      "|    explained_variance   | 0.769        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.67e+04     |\n",
      "|    n_updates            | 150          |\n",
      "|    policy_gradient_loss | -0.00467     |\n",
      "|    std                  | 0.817        |\n",
      "|    value_loss           | 1.46e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 199         |\n",
      "|    ep_rew_mean          | -70         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 316         |\n",
      "|    iterations           | 12          |\n",
      "|    time_elapsed         | 77          |\n",
      "|    total_timesteps      | 24576       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010603473 |\n",
      "|    clip_fraction        | 0.0987      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.21       |\n",
      "|    explained_variance   | 0.924       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 317         |\n",
      "|    n_updates            | 160         |\n",
      "|    policy_gradient_loss | 0.0021      |\n",
      "|    std                  | 0.798       |\n",
      "|    value_loss           | 2.44e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 194          |\n",
      "|    ep_rew_mean          | -92.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 318          |\n",
      "|    iterations           | 13           |\n",
      "|    time_elapsed         | 83           |\n",
      "|    total_timesteps      | 26624        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0110498695 |\n",
      "|    clip_fraction        | 0.121        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.17        |\n",
      "|    explained_variance   | -1.66        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 578          |\n",
      "|    n_updates            | 170          |\n",
      "|    policy_gradient_loss | -0.00867     |\n",
      "|    std                  | 0.769        |\n",
      "|    value_loss           | 2.07e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 199         |\n",
      "|    ep_rew_mean          | -26.7       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 313         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 91          |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003336512 |\n",
      "|    clip_fraction        | 0.05        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.16       |\n",
      "|    explained_variance   | 0.533       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.69e+04    |\n",
      "|    n_updates            | 180         |\n",
      "|    policy_gradient_loss | -0.00376    |\n",
      "|    std                  | 0.768       |\n",
      "|    value_loss           | 3e+04       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 191         |\n",
      "|    ep_rew_mean          | -65.9       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 302         |\n",
      "|    iterations           | 15          |\n",
      "|    time_elapsed         | 101         |\n",
      "|    total_timesteps      | 30720       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009239398 |\n",
      "|    clip_fraction        | 0.0917      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.15       |\n",
      "|    explained_variance   | -0.203      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.29e+03    |\n",
      "|    n_updates            | 190         |\n",
      "|    policy_gradient_loss | -0.00569    |\n",
      "|    std                  | 0.766       |\n",
      "|    value_loss           | 1.18e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 196         |\n",
      "|    ep_rew_mean          | -19.7       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 292         |\n",
      "|    iterations           | 16          |\n",
      "|    time_elapsed         | 111         |\n",
      "|    total_timesteps      | 32768       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012829138 |\n",
      "|    clip_fraction        | 0.0364      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.15       |\n",
      "|    explained_variance   | 0.293       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.15e+03    |\n",
      "|    n_updates            | 200         |\n",
      "|    policy_gradient_loss | 0.001       |\n",
      "|    std                  | 0.764       |\n",
      "|    value_loss           | 9.58e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 193         |\n",
      "|    ep_rew_mean          | -12.7       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 285         |\n",
      "|    iterations           | 17          |\n",
      "|    time_elapsed         | 121         |\n",
      "|    total_timesteps      | 34816       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016161859 |\n",
      "|    clip_fraction        | 0.209       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.13       |\n",
      "|    explained_variance   | 0.766       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.67e+03    |\n",
      "|    n_updates            | 210         |\n",
      "|    policy_gradient_loss | -0.00251    |\n",
      "|    std                  | 0.742       |\n",
      "|    value_loss           | 3.08e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 198          |\n",
      "|    ep_rew_mean          | -7.75        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 287          |\n",
      "|    iterations           | 18           |\n",
      "|    time_elapsed         | 128          |\n",
      "|    total_timesteps      | 36864        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029777966 |\n",
      "|    clip_fraction        | 0.0336       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.12        |\n",
      "|    explained_variance   | 0.0849       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.84e+04     |\n",
      "|    n_updates            | 220          |\n",
      "|    policy_gradient_loss | -0.00406     |\n",
      "|    std                  | 0.742        |\n",
      "|    value_loss           | 6.85e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 210         |\n",
      "|    ep_rew_mean          | 58.3        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 290         |\n",
      "|    iterations           | 19          |\n",
      "|    time_elapsed         | 133         |\n",
      "|    total_timesteps      | 38912       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007794897 |\n",
      "|    clip_fraction        | 0.0948      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.12       |\n",
      "|    explained_variance   | 0.537       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.94e+04    |\n",
      "|    n_updates            | 230         |\n",
      "|    policy_gradient_loss | -0.0057     |\n",
      "|    std                  | 0.742       |\n",
      "|    value_loss           | 3.15e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 228          |\n",
      "|    ep_rew_mean          | 190          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 281          |\n",
      "|    iterations           | 20           |\n",
      "|    time_elapsed         | 145          |\n",
      "|    total_timesteps      | 40960        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0076172296 |\n",
      "|    clip_fraction        | 0.0974       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.12        |\n",
      "|    explained_variance   | 0.626        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.16e+03     |\n",
      "|    n_updates            | 240          |\n",
      "|    policy_gradient_loss | -0.00785     |\n",
      "|    std                  | 0.74         |\n",
      "|    value_loss           | 2.47e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 241          |\n",
      "|    ep_rew_mean          | 311          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 277          |\n",
      "|    iterations           | 21           |\n",
      "|    time_elapsed         | 154          |\n",
      "|    total_timesteps      | 43008        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0068155695 |\n",
      "|    clip_fraction        | 0.0658       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.09        |\n",
      "|    explained_variance   | -1.87        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 194          |\n",
      "|    n_updates            | 250          |\n",
      "|    policy_gradient_loss | -0.00295     |\n",
      "|    std                  | 0.702        |\n",
      "|    value_loss           | 729          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 242          |\n",
      "|    ep_rew_mean          | 348          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 277          |\n",
      "|    iterations           | 22           |\n",
      "|    time_elapsed         | 162          |\n",
      "|    total_timesteps      | 45056        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0098315105 |\n",
      "|    clip_fraction        | 0.0896       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.04        |\n",
      "|    explained_variance   | -0.514       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 130          |\n",
      "|    n_updates            | 260          |\n",
      "|    policy_gradient_loss | -0.00891     |\n",
      "|    std                  | 0.661        |\n",
      "|    value_loss           | 766          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 223         |\n",
      "|    ep_rew_mean          | 235         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 280         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 167         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009366837 |\n",
      "|    clip_fraction        | 0.0979      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.989      |\n",
      "|    explained_variance   | 0.765       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 159         |\n",
      "|    n_updates            | 270         |\n",
      "|    policy_gradient_loss | -0.00535    |\n",
      "|    std                  | 0.64        |\n",
      "|    value_loss           | 8.7e+03     |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 237         |\n",
      "|    ep_rew_mean          | 391         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 281         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 174         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007885162 |\n",
      "|    clip_fraction        | 0.0822      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.971      |\n",
      "|    explained_variance   | -0.0322     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.04e+04    |\n",
      "|    n_updates            | 280         |\n",
      "|    policy_gradient_loss | -0.0117     |\n",
      "|    std                  | 0.639       |\n",
      "|    value_loss           | 5.48e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 245          |\n",
      "|    ep_rew_mean          | 468          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 283          |\n",
      "|    iterations           | 25           |\n",
      "|    time_elapsed         | 180          |\n",
      "|    total_timesteps      | 51200        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0115402695 |\n",
      "|    clip_fraction        | 0.0915       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.968       |\n",
      "|    explained_variance   | -2.13        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 454          |\n",
      "|    n_updates            | 290          |\n",
      "|    policy_gradient_loss | -0.0108      |\n",
      "|    std                  | 0.635        |\n",
      "|    value_loss           | 1.53e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 248        |\n",
      "|    ep_rew_mean          | 493        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 283        |\n",
      "|    iterations           | 26         |\n",
      "|    time_elapsed         | 187        |\n",
      "|    total_timesteps      | 53248      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.04059755 |\n",
      "|    clip_fraction        | 0.0919     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.949     |\n",
      "|    explained_variance   | 0.583      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 804        |\n",
      "|    n_updates            | 300        |\n",
      "|    policy_gradient_loss | 0.00142    |\n",
      "|    std                  | 0.615      |\n",
      "|    value_loss           | 4.49e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 252        |\n",
      "|    ep_rew_mean          | 535        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 281        |\n",
      "|    iterations           | 27         |\n",
      "|    time_elapsed         | 196        |\n",
      "|    total_timesteps      | 55296      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.13896358 |\n",
      "|    clip_fraction        | 0.0975     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.928     |\n",
      "|    explained_variance   | 0.734      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.26e+03   |\n",
      "|    n_updates            | 310        |\n",
      "|    policy_gradient_loss | 0.00651    |\n",
      "|    std                  | 0.609      |\n",
      "|    value_loss           | 6.73e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 260         |\n",
      "|    ep_rew_mean          | 621         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 280         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 204         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009863383 |\n",
      "|    clip_fraction        | 0.102       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.894      |\n",
      "|    explained_variance   | -0.693      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 288         |\n",
      "|    n_updates            | 320         |\n",
      "|    policy_gradient_loss | -0.00791    |\n",
      "|    std                  | 0.575       |\n",
      "|    value_loss           | 368         |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 259         |\n",
      "|    ep_rew_mean          | 647         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 281         |\n",
      "|    iterations           | 29          |\n",
      "|    time_elapsed         | 210         |\n",
      "|    total_timesteps      | 59392       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013049594 |\n",
      "|    clip_fraction        | 0.0728      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.848      |\n",
      "|    explained_variance   | 0.949       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 46.9        |\n",
      "|    n_updates            | 330         |\n",
      "|    policy_gradient_loss | -0.00657    |\n",
      "|    std                  | 0.553       |\n",
      "|    value_loss           | 557         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 275          |\n",
      "|    ep_rew_mean          | 810          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 284          |\n",
      "|    iterations           | 30           |\n",
      "|    time_elapsed         | 215          |\n",
      "|    total_timesteps      | 61440        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0077112904 |\n",
      "|    clip_fraction        | 0.0711       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.811       |\n",
      "|    explained_variance   | 0.512        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 113          |\n",
      "|    n_updates            | 340          |\n",
      "|    policy_gradient_loss | -0.00889     |\n",
      "|    std                  | 0.534        |\n",
      "|    value_loss           | 6.38e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 288          |\n",
      "|    ep_rew_mean          | 934          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 287          |\n",
      "|    iterations           | 31           |\n",
      "|    time_elapsed         | 220          |\n",
      "|    total_timesteps      | 63488        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027313062 |\n",
      "|    clip_fraction        | 0.0891       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.774       |\n",
      "|    explained_variance   | -2           |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 96.5         |\n",
      "|    n_updates            | 350          |\n",
      "|    policy_gradient_loss | -0.0033      |\n",
      "|    std                  | 0.516        |\n",
      "|    value_loss           | 566          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 297          |\n",
      "|    ep_rew_mean          | 1.04e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 290          |\n",
      "|    iterations           | 32           |\n",
      "|    time_elapsed         | 225          |\n",
      "|    total_timesteps      | 65536        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0068132747 |\n",
      "|    clip_fraction        | 0.059        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.731       |\n",
      "|    explained_variance   | 0.952        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 57.6         |\n",
      "|    n_updates            | 360          |\n",
      "|    policy_gradient_loss | -0.00846     |\n",
      "|    std                  | 0.491        |\n",
      "|    value_loss           | 1.24e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 308         |\n",
      "|    ep_rew_mean          | 1.17e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 293         |\n",
      "|    iterations           | 33          |\n",
      "|    time_elapsed         | 230         |\n",
      "|    total_timesteps      | 67584       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007869521 |\n",
      "|    clip_fraction        | 0.0368      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.693      |\n",
      "|    explained_variance   | 0.969       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 49.2        |\n",
      "|    n_updates            | 370         |\n",
      "|    policy_gradient_loss | -0.000611   |\n",
      "|    std                  | 0.475       |\n",
      "|    value_loss           | 1.5e+03     |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 1.32e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 295         |\n",
      "|    iterations           | 34          |\n",
      "|    time_elapsed         | 235         |\n",
      "|    total_timesteps      | 69632       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007048823 |\n",
      "|    clip_fraction        | 0.0492      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.655      |\n",
      "|    explained_variance   | -9.25       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 54.4        |\n",
      "|    n_updates            | 380         |\n",
      "|    policy_gradient_loss | -0.00627    |\n",
      "|    std                  | 0.454       |\n",
      "|    value_loss           | 1.28e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 310          |\n",
      "|    ep_rew_mean          | 1.24e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 298          |\n",
      "|    iterations           | 35           |\n",
      "|    time_elapsed         | 240          |\n",
      "|    total_timesteps      | 71680        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0066220877 |\n",
      "|    clip_fraction        | 0.0646       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.615       |\n",
      "|    explained_variance   | 0.705        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 27.9         |\n",
      "|    n_updates            | 390          |\n",
      "|    policy_gradient_loss | -0.00677     |\n",
      "|    std                  | 0.44         |\n",
      "|    value_loss           | 7.44e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 306          |\n",
      "|    ep_rew_mean          | 1.23e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 300          |\n",
      "|    iterations           | 36           |\n",
      "|    time_elapsed         | 245          |\n",
      "|    total_timesteps      | 73728        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0024084463 |\n",
      "|    clip_fraction        | 0.0335       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.593       |\n",
      "|    explained_variance   | 0.831        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 43           |\n",
      "|    n_updates            | 400          |\n",
      "|    policy_gradient_loss | 0.00428      |\n",
      "|    std                  | 0.435        |\n",
      "|    value_loss           | 1.62e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 314          |\n",
      "|    ep_rew_mean          | 1.34e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 300          |\n",
      "|    iterations           | 37           |\n",
      "|    time_elapsed         | 252          |\n",
      "|    total_timesteps      | 75776        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0065057045 |\n",
      "|    clip_fraction        | 0.0979       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.568       |\n",
      "|    explained_variance   | 0.547        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 24.9         |\n",
      "|    n_updates            | 410          |\n",
      "|    policy_gradient_loss | -0.00619     |\n",
      "|    std                  | 0.419        |\n",
      "|    value_loss           | 3.08e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 316          |\n",
      "|    ep_rew_mean          | 1.36e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 299          |\n",
      "|    iterations           | 38           |\n",
      "|    time_elapsed         | 259          |\n",
      "|    total_timesteps      | 77824        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0082638245 |\n",
      "|    clip_fraction        | 0.048        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.526       |\n",
      "|    explained_variance   | -0.0604      |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 39.3         |\n",
      "|    n_updates            | 420          |\n",
      "|    policy_gradient_loss | -0.00923     |\n",
      "|    std                  | 0.399        |\n",
      "|    value_loss           | 87.9         |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 327        |\n",
      "|    ep_rew_mean          | 1.5e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 298        |\n",
      "|    iterations           | 39         |\n",
      "|    time_elapsed         | 267        |\n",
      "|    total_timesteps      | 79872      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00506144 |\n",
      "|    clip_fraction        | 0.0462     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.492     |\n",
      "|    explained_variance   | 0.781      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.25e+03   |\n",
      "|    n_updates            | 430        |\n",
      "|    policy_gradient_loss | 0.00205    |\n",
      "|    std                  | 0.392      |\n",
      "|    value_loss           | 8.57e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 325         |\n",
      "|    ep_rew_mean          | 1.51e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 297         |\n",
      "|    iterations           | 40          |\n",
      "|    time_elapsed         | 275         |\n",
      "|    total_timesteps      | 81920       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005857582 |\n",
      "|    clip_fraction        | 0.11        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.467      |\n",
      "|    explained_variance   | -19         |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 49.5        |\n",
      "|    n_updates            | 440         |\n",
      "|    policy_gradient_loss | 0.00608     |\n",
      "|    std                  | 0.38        |\n",
      "|    value_loss           | 1.93e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 324          |\n",
      "|    ep_rew_mean          | 1.54e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 296          |\n",
      "|    iterations           | 41           |\n",
      "|    time_elapsed         | 283          |\n",
      "|    total_timesteps      | 83968        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012198628 |\n",
      "|    clip_fraction        | 0.0345       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.451       |\n",
      "|    explained_variance   | 0.273        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 545          |\n",
      "|    n_updates            | 450          |\n",
      "|    policy_gradient_loss | -0.00179     |\n",
      "|    std                  | 0.379        |\n",
      "|    value_loss           | 1.01e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 325          |\n",
      "|    ep_rew_mean          | 1.59e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 295          |\n",
      "|    iterations           | 42           |\n",
      "|    time_elapsed         | 291          |\n",
      "|    total_timesteps      | 86016        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0060632345 |\n",
      "|    clip_fraction        | 0.0828       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.442       |\n",
      "|    explained_variance   | 0.763        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.22e+03     |\n",
      "|    n_updates            | 460          |\n",
      "|    policy_gradient_loss | -0.000896    |\n",
      "|    std                  | 0.373        |\n",
      "|    value_loss           | 4.72e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 312         |\n",
      "|    ep_rew_mean          | 1.49e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 295         |\n",
      "|    iterations           | 43          |\n",
      "|    time_elapsed         | 297         |\n",
      "|    total_timesteps      | 88064       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015721854 |\n",
      "|    clip_fraction        | 0.0632      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.42       |\n",
      "|    explained_variance   | -5.85       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 49.1        |\n",
      "|    n_updates            | 470         |\n",
      "|    policy_gradient_loss | 0.00236     |\n",
      "|    std                  | 0.363       |\n",
      "|    value_loss           | 684         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 308          |\n",
      "|    ep_rew_mean          | 1.48e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 296          |\n",
      "|    iterations           | 44           |\n",
      "|    time_elapsed         | 304          |\n",
      "|    total_timesteps      | 90112        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0051364857 |\n",
      "|    clip_fraction        | 0.0558       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.391       |\n",
      "|    explained_variance   | 0.914        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 22.6         |\n",
      "|    n_updates            | 480          |\n",
      "|    policy_gradient_loss | -0.00222     |\n",
      "|    std                  | 0.353        |\n",
      "|    value_loss           | 4.43e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 282         |\n",
      "|    ep_rew_mean          | 1.27e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 296         |\n",
      "|    iterations           | 45          |\n",
      "|    time_elapsed         | 311         |\n",
      "|    total_timesteps      | 92160       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011028011 |\n",
      "|    clip_fraction        | 0.142       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.357      |\n",
      "|    explained_variance   | 0.971       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 25.3        |\n",
      "|    n_updates            | 490         |\n",
      "|    policy_gradient_loss | 0.023       |\n",
      "|    std                  | 0.339       |\n",
      "|    value_loss           | 190         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 286         |\n",
      "|    ep_rew_mean          | 1.34e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 297         |\n",
      "|    iterations           | 46          |\n",
      "|    time_elapsed         | 316         |\n",
      "|    total_timesteps      | 94208       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004631918 |\n",
      "|    clip_fraction        | 0.0445      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.337      |\n",
      "|    explained_variance   | 0.523       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.23e+04    |\n",
      "|    n_updates            | 500         |\n",
      "|    policy_gradient_loss | -0.0105     |\n",
      "|    std                  | 0.339       |\n",
      "|    value_loss           | 5.93e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 286         |\n",
      "|    ep_rew_mean          | 1.36e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 299         |\n",
      "|    iterations           | 47          |\n",
      "|    time_elapsed         | 321         |\n",
      "|    total_timesteps      | 96256       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009290674 |\n",
      "|    clip_fraction        | 0.113       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.336      |\n",
      "|    explained_variance   | -1.63       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 541         |\n",
      "|    n_updates            | 510         |\n",
      "|    policy_gradient_loss | -0.0135     |\n",
      "|    std                  | 0.338       |\n",
      "|    value_loss           | 2.88e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 286         |\n",
      "|    ep_rew_mean          | 1.38e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 301         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 326         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011846319 |\n",
      "|    clip_fraction        | 0.14        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.327      |\n",
      "|    explained_variance   | -0.82       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 163         |\n",
      "|    n_updates            | 520         |\n",
      "|    policy_gradient_loss | -0.0139     |\n",
      "|    std                  | 0.334       |\n",
      "|    value_loss           | 201         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 298          |\n",
      "|    ep_rew_mean          | 1.53e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 302          |\n",
      "|    iterations           | 49           |\n",
      "|    time_elapsed         | 331          |\n",
      "|    total_timesteps      | 100352       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027501304 |\n",
      "|    clip_fraction        | 0.0373       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.311       |\n",
      "|    explained_variance   | -0.279       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 47.7         |\n",
      "|    n_updates            | 530          |\n",
      "|    policy_gradient_loss | -0.00497     |\n",
      "|    std                  | 0.327        |\n",
      "|    value_loss           | 83           |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 306          |\n",
      "|    ep_rew_mean          | 1.65e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 303          |\n",
      "|    iterations           | 50           |\n",
      "|    time_elapsed         | 337          |\n",
      "|    total_timesteps      | 102400       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0043533873 |\n",
      "|    clip_fraction        | 0.0336       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.28        |\n",
      "|    explained_variance   | 0.155        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 28.4         |\n",
      "|    n_updates            | 540          |\n",
      "|    policy_gradient_loss | -0.00642     |\n",
      "|    std                  | 0.313        |\n",
      "|    value_loss           | 64.9         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 310         |\n",
      "|    ep_rew_mean          | 1.72e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 302         |\n",
      "|    iterations           | 51          |\n",
      "|    time_elapsed         | 345         |\n",
      "|    total_timesteps      | 104448      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.023731964 |\n",
      "|    clip_fraction        | 0.101       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.248      |\n",
      "|    explained_variance   | 0.849       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 39.3        |\n",
      "|    n_updates            | 550         |\n",
      "|    policy_gradient_loss | -0.00158    |\n",
      "|    std                  | 0.307       |\n",
      "|    value_loss           | 5.8e+03     |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 310          |\n",
      "|    ep_rew_mean          | 1.74e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 301          |\n",
      "|    iterations           | 52           |\n",
      "|    time_elapsed         | 352          |\n",
      "|    total_timesteps      | 106496       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0069928262 |\n",
      "|    clip_fraction        | 0.0939       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.224       |\n",
      "|    explained_variance   | -0.224       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 32.1         |\n",
      "|    n_updates            | 560          |\n",
      "|    policy_gradient_loss | -0.0117      |\n",
      "|    std                  | 0.3          |\n",
      "|    value_loss           | 51.9         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 262         |\n",
      "|    ep_rew_mean          | 1.34e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 300         |\n",
      "|    iterations           | 53          |\n",
      "|    time_elapsed         | 361         |\n",
      "|    total_timesteps      | 108544      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016300114 |\n",
      "|    clip_fraction        | 0.113       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.203      |\n",
      "|    explained_variance   | -0.0782     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 19.4        |\n",
      "|    n_updates            | 570         |\n",
      "|    policy_gradient_loss | -0.0102     |\n",
      "|    std                  | 0.293       |\n",
      "|    value_loss           | 65.9        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 241         |\n",
      "|    ep_rew_mean          | 1.15e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 299         |\n",
      "|    iterations           | 54          |\n",
      "|    time_elapsed         | 369         |\n",
      "|    total_timesteps      | 110592      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004436843 |\n",
      "|    clip_fraction        | 0.0505      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.189      |\n",
      "|    explained_variance   | 0.25        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.74e+04    |\n",
      "|    n_updates            | 580         |\n",
      "|    policy_gradient_loss | -0.00593    |\n",
      "|    std                  | 0.292       |\n",
      "|    value_loss           | 1.06e+05    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 239          |\n",
      "|    ep_rew_mean          | 1.14e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 297          |\n",
      "|    iterations           | 55           |\n",
      "|    time_elapsed         | 379          |\n",
      "|    total_timesteps      | 112640       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0056345337 |\n",
      "|    clip_fraction        | 0.072        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.189       |\n",
      "|    explained_variance   | 0.576        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.87e+04     |\n",
      "|    n_updates            | 590          |\n",
      "|    policy_gradient_loss | -0.0104      |\n",
      "|    std                  | 0.292        |\n",
      "|    value_loss           | 4.62e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 243         |\n",
      "|    ep_rew_mean          | 1.18e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 296         |\n",
      "|    iterations           | 56          |\n",
      "|    time_elapsed         | 386         |\n",
      "|    total_timesteps      | 114688      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012044152 |\n",
      "|    clip_fraction        | 0.118       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.189      |\n",
      "|    explained_variance   | 0.721       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.05e+04    |\n",
      "|    n_updates            | 600         |\n",
      "|    policy_gradient_loss | -0.0164     |\n",
      "|    std                  | 0.292       |\n",
      "|    value_loss           | 2.58e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 254          |\n",
      "|    ep_rew_mean          | 1.3e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 295          |\n",
      "|    iterations           | 57           |\n",
      "|    time_elapsed         | 394          |\n",
      "|    total_timesteps      | 116736       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0146824615 |\n",
      "|    clip_fraction        | 0.138        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.187       |\n",
      "|    explained_variance   | 0.376        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 686          |\n",
      "|    n_updates            | 610          |\n",
      "|    policy_gradient_loss | -0.0135      |\n",
      "|    std                  | 0.291        |\n",
      "|    value_loss           | 7.36e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 266          |\n",
      "|    ep_rew_mean          | 1.45e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 294          |\n",
      "|    iterations           | 58           |\n",
      "|    time_elapsed         | 403          |\n",
      "|    total_timesteps      | 118784       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0069474922 |\n",
      "|    clip_fraction        | 0.0959       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.181       |\n",
      "|    explained_variance   | 0.891        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 282          |\n",
      "|    n_updates            | 620          |\n",
      "|    policy_gradient_loss | -0.00486     |\n",
      "|    std                  | 0.289        |\n",
      "|    value_loss           | 891          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 270         |\n",
      "|    ep_rew_mean          | 1.49e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 293         |\n",
      "|    iterations           | 59          |\n",
      "|    time_elapsed         | 411         |\n",
      "|    total_timesteps      | 120832      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.035373718 |\n",
      "|    clip_fraction        | 0.0833      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.168      |\n",
      "|    explained_variance   | 0.78        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 340         |\n",
      "|    n_updates            | 630         |\n",
      "|    policy_gradient_loss | 0.00456     |\n",
      "|    std                  | 0.284       |\n",
      "|    value_loss           | 5.78e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 262         |\n",
      "|    ep_rew_mean          | 1.42e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 291         |\n",
      "|    iterations           | 60          |\n",
      "|    time_elapsed         | 421         |\n",
      "|    total_timesteps      | 122880      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005145364 |\n",
      "|    clip_fraction        | 0.0759      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.151      |\n",
      "|    explained_variance   | -19.6       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 45.1        |\n",
      "|    n_updates            | 640         |\n",
      "|    policy_gradient_loss | 0.00115     |\n",
      "|    std                  | 0.278       |\n",
      "|    value_loss           | 4.43e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 246          |\n",
      "|    ep_rew_mean          | 1.23e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 290          |\n",
      "|    iterations           | 61           |\n",
      "|    time_elapsed         | 430          |\n",
      "|    total_timesteps      | 124928       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011806218 |\n",
      "|    clip_fraction        | 0.00659      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.135       |\n",
      "|    explained_variance   | 0.568        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.02e+04     |\n",
      "|    n_updates            | 650          |\n",
      "|    policy_gradient_loss | -0.00143     |\n",
      "|    std                  | 0.275        |\n",
      "|    value_loss           | 9.63e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 258        |\n",
      "|    ep_rew_mean          | 1.38e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 288        |\n",
      "|    iterations           | 62         |\n",
      "|    time_elapsed         | 439        |\n",
      "|    total_timesteps      | 126976     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.34672213 |\n",
      "|    clip_fraction        | 0.0621     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.123     |\n",
      "|    explained_variance   | 0.83       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 24.8       |\n",
      "|    n_updates            | 660        |\n",
      "|    policy_gradient_loss | 0.00445    |\n",
      "|    std                  | 0.271      |\n",
      "|    value_loss           | 6.83e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 258          |\n",
      "|    ep_rew_mean          | 1.39e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 288          |\n",
      "|    iterations           | 63           |\n",
      "|    time_elapsed         | 447          |\n",
      "|    total_timesteps      | 129024       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0078024343 |\n",
      "|    clip_fraction        | 0.0519       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.0795      |\n",
      "|    explained_variance   | -8.2         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 22.3         |\n",
      "|    n_updates            | 670          |\n",
      "|    policy_gradient_loss | -0.00544     |\n",
      "|    std                  | 0.254        |\n",
      "|    value_loss           | 307          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 254         |\n",
      "|    ep_rew_mean          | 1.35e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 287         |\n",
      "|    iterations           | 64          |\n",
      "|    time_elapsed         | 455         |\n",
      "|    total_timesteps      | 131072      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004413543 |\n",
      "|    clip_fraction        | 0.0599      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.02       |\n",
      "|    explained_variance   | 0.197       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 18.1        |\n",
      "|    n_updates            | 680         |\n",
      "|    policy_gradient_loss | -0.00517    |\n",
      "|    std                  | 0.241       |\n",
      "|    value_loss           | 38.6        |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 257        |\n",
      "|    ep_rew_mean          | 1.39e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 286        |\n",
      "|    iterations           | 65         |\n",
      "|    time_elapsed         | 464        |\n",
      "|    total_timesteps      | 133120     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.24924818 |\n",
      "|    clip_fraction        | 0.172      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.0276     |\n",
      "|    explained_variance   | 0.9        |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.7e+04    |\n",
      "|    n_updates            | 690        |\n",
      "|    policy_gradient_loss | -0.000208  |\n",
      "|    std                  | 0.232      |\n",
      "|    value_loss           | 2.43e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 271         |\n",
      "|    ep_rew_mean          | 1.53e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 286         |\n",
      "|    iterations           | 66          |\n",
      "|    time_elapsed         | 472         |\n",
      "|    total_timesteps      | 135168      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001583004 |\n",
      "|    clip_fraction        | 0.0171      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0442      |\n",
      "|    explained_variance   | 0.591       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.01e+03    |\n",
      "|    n_updates            | 700         |\n",
      "|    policy_gradient_loss | -0.000906   |\n",
      "|    std                  | 0.231       |\n",
      "|    value_loss           | 9.6e+03     |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 254          |\n",
      "|    ep_rew_mean          | 1.38e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 285          |\n",
      "|    iterations           | 67           |\n",
      "|    time_elapsed         | 480          |\n",
      "|    total_timesteps      | 137216       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019693251 |\n",
      "|    clip_fraction        | 0.00732      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.0447       |\n",
      "|    explained_variance   | 0.792        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.31e+04     |\n",
      "|    n_updates            | 710          |\n",
      "|    policy_gradient_loss | -0.00248     |\n",
      "|    std                  | 0.231        |\n",
      "|    value_loss           | 1.47e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 225         |\n",
      "|    ep_rew_mean          | 1.09e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 285         |\n",
      "|    iterations           | 68          |\n",
      "|    time_elapsed         | 487         |\n",
      "|    total_timesteps      | 139264      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001068679 |\n",
      "|    clip_fraction        | 0.00942     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0451      |\n",
      "|    explained_variance   | 0.861       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.4e+04     |\n",
      "|    n_updates            | 720         |\n",
      "|    policy_gradient_loss | -0.00209    |\n",
      "|    std                  | 0.231       |\n",
      "|    value_loss           | 4.1e+04     |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 202          |\n",
      "|    ep_rew_mean          | 845          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 284          |\n",
      "|    iterations           | 69           |\n",
      "|    time_elapsed         | 495          |\n",
      "|    total_timesteps      | 141312       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012961377 |\n",
      "|    clip_fraction        | 0.0142       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.0457       |\n",
      "|    explained_variance   | 0.933        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 398          |\n",
      "|    n_updates            | 730          |\n",
      "|    policy_gradient_loss | 0.000899     |\n",
      "|    std                  | 0.231        |\n",
      "|    value_loss           | 1.25e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 194         |\n",
      "|    ep_rew_mean          | 792         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 284         |\n",
      "|    iterations           | 70          |\n",
      "|    time_elapsed         | 503         |\n",
      "|    total_timesteps      | 143360      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005403825 |\n",
      "|    clip_fraction        | 0.031       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0466      |\n",
      "|    explained_variance   | 0.792       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.74e+03    |\n",
      "|    n_updates            | 740         |\n",
      "|    policy_gradient_loss | 0.000125    |\n",
      "|    std                  | 0.231       |\n",
      "|    value_loss           | 1.72e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 187          |\n",
      "|    ep_rew_mean          | 740          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 284          |\n",
      "|    iterations           | 71           |\n",
      "|    time_elapsed         | 510          |\n",
      "|    total_timesteps      | 145408       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021714487 |\n",
      "|    clip_fraction        | 0.0106       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.047        |\n",
      "|    explained_variance   | 0.859        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.11e+03     |\n",
      "|    n_updates            | 750          |\n",
      "|    policy_gradient_loss | -0.000503    |\n",
      "|    std                  | 0.231        |\n",
      "|    value_loss           | 1.62e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 167         |\n",
      "|    ep_rew_mean          | 518         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 284         |\n",
      "|    iterations           | 72          |\n",
      "|    time_elapsed         | 518         |\n",
      "|    total_timesteps      | 147456      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004508338 |\n",
      "|    clip_fraction        | 0.0233      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0474      |\n",
      "|    explained_variance   | 0.482       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.95e+03    |\n",
      "|    n_updates            | 760         |\n",
      "|    policy_gradient_loss | -0.0017     |\n",
      "|    std                  | 0.231       |\n",
      "|    value_loss           | 1.91e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 159         |\n",
      "|    ep_rew_mean          | 446         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 283         |\n",
      "|    iterations           | 73          |\n",
      "|    time_elapsed         | 526         |\n",
      "|    total_timesteps      | 149504      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010519819 |\n",
      "|    clip_fraction        | 0.0352      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0495      |\n",
      "|    explained_variance   | 0.897       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.13e+03    |\n",
      "|    n_updates            | 770         |\n",
      "|    policy_gradient_loss | -0.00104    |\n",
      "|    std                  | 0.23        |\n",
      "|    value_loss           | 7.81e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 162         |\n",
      "|    ep_rew_mean          | 493         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 281         |\n",
      "|    iterations           | 74          |\n",
      "|    time_elapsed         | 538         |\n",
      "|    total_timesteps      | 151552      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016357645 |\n",
      "|    clip_fraction        | 0.0935      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0516      |\n",
      "|    explained_variance   | 0.912       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.38e+03    |\n",
      "|    n_updates            | 780         |\n",
      "|    policy_gradient_loss | 0.000167    |\n",
      "|    std                  | 0.23        |\n",
      "|    value_loss           | 1.29e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 166         |\n",
      "|    ep_rew_mean          | 519         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 278         |\n",
      "|    iterations           | 75          |\n",
      "|    time_elapsed         | 550         |\n",
      "|    total_timesteps      | 153600      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011597884 |\n",
      "|    clip_fraction        | 0.14        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0516      |\n",
      "|    explained_variance   | 0.944       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 260         |\n",
      "|    n_updates            | 790         |\n",
      "|    policy_gradient_loss | -0.000339   |\n",
      "|    std                  | 0.23        |\n",
      "|    value_loss           | 2.73e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 178         |\n",
      "|    ep_rew_mean          | 664         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 276         |\n",
      "|    iterations           | 76          |\n",
      "|    time_elapsed         | 562         |\n",
      "|    total_timesteps      | 155648      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003837773 |\n",
      "|    clip_fraction        | 0.0524      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0516      |\n",
      "|    explained_variance   | 0.808       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 9.87e+03    |\n",
      "|    n_updates            | 800         |\n",
      "|    policy_gradient_loss | 0.00144     |\n",
      "|    std                  | 0.23        |\n",
      "|    value_loss           | 2.89e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 190         |\n",
      "|    ep_rew_mean          | 819         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 274         |\n",
      "|    iterations           | 77          |\n",
      "|    time_elapsed         | 574         |\n",
      "|    total_timesteps      | 157696      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007105803 |\n",
      "|    clip_fraction        | 0.0331      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0531      |\n",
      "|    explained_variance   | 0.924       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.32e+03    |\n",
      "|    n_updates            | 810         |\n",
      "|    policy_gradient_loss | 0.0159      |\n",
      "|    std                  | 0.229       |\n",
      "|    value_loss           | 6.58e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 167         |\n",
      "|    ep_rew_mean          | 557         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 273         |\n",
      "|    iterations           | 78          |\n",
      "|    time_elapsed         | 583         |\n",
      "|    total_timesteps      | 159744      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006574299 |\n",
      "|    clip_fraction        | 0.0587      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0553      |\n",
      "|    explained_variance   | 0.829       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.36e+03    |\n",
      "|    n_updates            | 820         |\n",
      "|    policy_gradient_loss | -0.00134    |\n",
      "|    std                  | 0.229       |\n",
      "|    value_loss           | 9.27e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 162         |\n",
      "|    ep_rew_mean          | 468         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 273         |\n",
      "|    iterations           | 79          |\n",
      "|    time_elapsed         | 591         |\n",
      "|    total_timesteps      | 161792      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006191591 |\n",
      "|    clip_fraction        | 0.0491      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0564      |\n",
      "|    explained_variance   | 0.545       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 8.31e+04    |\n",
      "|    n_updates            | 830         |\n",
      "|    policy_gradient_loss | -0.00282    |\n",
      "|    std                  | 0.229       |\n",
      "|    value_loss           | 1.34e+05    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 165         |\n",
      "|    ep_rew_mean          | 513         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 272         |\n",
      "|    iterations           | 80          |\n",
      "|    time_elapsed         | 600         |\n",
      "|    total_timesteps      | 163840      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007903238 |\n",
      "|    clip_fraction        | 0.0806      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0565      |\n",
      "|    explained_variance   | 0.317       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.17e+04    |\n",
      "|    n_updates            | 840         |\n",
      "|    policy_gradient_loss | -0.00808    |\n",
      "|    std                  | 0.229       |\n",
      "|    value_loss           | 7.76e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 174         |\n",
      "|    ep_rew_mean          | 636         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 272         |\n",
      "|    iterations           | 81          |\n",
      "|    time_elapsed         | 608         |\n",
      "|    total_timesteps      | 165888      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.030866459 |\n",
      "|    clip_fraction        | 0.24        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0581      |\n",
      "|    explained_variance   | 0.821       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 313         |\n",
      "|    n_updates            | 850         |\n",
      "|    policy_gradient_loss | -0.00101    |\n",
      "|    std                  | 0.228       |\n",
      "|    value_loss           | 5.92e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 186         |\n",
      "|    ep_rew_mean          | 759         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 272         |\n",
      "|    iterations           | 82          |\n",
      "|    time_elapsed         | 616         |\n",
      "|    total_timesteps      | 167936      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010143675 |\n",
      "|    clip_fraction        | 0.0629      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0597      |\n",
      "|    explained_variance   | 0.731       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.77e+04    |\n",
      "|    n_updates            | 860         |\n",
      "|    policy_gradient_loss | -0.0029     |\n",
      "|    std                  | 0.228       |\n",
      "|    value_loss           | 1.75e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 190         |\n",
      "|    ep_rew_mean          | 777         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 271         |\n",
      "|    iterations           | 83          |\n",
      "|    time_elapsed         | 624         |\n",
      "|    total_timesteps      | 169984      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019315233 |\n",
      "|    clip_fraction        | 0.102       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0688      |\n",
      "|    explained_variance   | 0.889       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 820         |\n",
      "|    n_updates            | 870         |\n",
      "|    policy_gradient_loss | -0.00344    |\n",
      "|    std                  | 0.224       |\n",
      "|    value_loss           | 706         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 190        |\n",
      "|    ep_rew_mean          | 754        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 271        |\n",
      "|    iterations           | 84         |\n",
      "|    time_elapsed         | 632        |\n",
      "|    total_timesteps      | 172032     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.07108556 |\n",
      "|    clip_fraction        | 0.105      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.0957     |\n",
      "|    explained_variance   | 0.915      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 24.2       |\n",
      "|    n_updates            | 880        |\n",
      "|    policy_gradient_loss | 0.00037    |\n",
      "|    std                  | 0.216      |\n",
      "|    value_loss           | 1.42e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 185         |\n",
      "|    ep_rew_mean          | 692         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 271         |\n",
      "|    iterations           | 85          |\n",
      "|    time_elapsed         | 640         |\n",
      "|    total_timesteps      | 174080      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011168043 |\n",
      "|    clip_fraction        | 0.0558      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.114       |\n",
      "|    explained_variance   | 0.875       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.52e+03    |\n",
      "|    n_updates            | 890         |\n",
      "|    policy_gradient_loss | 0.00142     |\n",
      "|    std                  | 0.216       |\n",
      "|    value_loss           | 2.33e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 189         |\n",
      "|    ep_rew_mean          | 729         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 271         |\n",
      "|    iterations           | 86          |\n",
      "|    time_elapsed         | 648         |\n",
      "|    total_timesteps      | 176128      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.042939566 |\n",
      "|    clip_fraction        | 0.18        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.115       |\n",
      "|    explained_variance   | 0.915       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.35e+03    |\n",
      "|    n_updates            | 900         |\n",
      "|    policy_gradient_loss | -0.000127   |\n",
      "|    std                  | 0.215       |\n",
      "|    value_loss           | 9.14e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 197         |\n",
      "|    ep_rew_mean          | 816         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 271         |\n",
      "|    iterations           | 87          |\n",
      "|    time_elapsed         | 656         |\n",
      "|    total_timesteps      | 178176      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.053298574 |\n",
      "|    clip_fraction        | 0.101       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.121       |\n",
      "|    explained_variance   | 0.975       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.26e+03    |\n",
      "|    n_updates            | 910         |\n",
      "|    policy_gradient_loss | 0.000479    |\n",
      "|    std                  | 0.213       |\n",
      "|    value_loss           | 1.85e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 213          |\n",
      "|    ep_rew_mean          | 997          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 270          |\n",
      "|    iterations           | 88           |\n",
      "|    time_elapsed         | 665          |\n",
      "|    total_timesteps      | 180224       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0056643737 |\n",
      "|    clip_fraction        | 0.0915       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.145        |\n",
      "|    explained_variance   | 0.199        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 23           |\n",
      "|    n_updates            | 920          |\n",
      "|    policy_gradient_loss | 0.00736      |\n",
      "|    std                  | 0.206        |\n",
      "|    value_loss           | 4.52e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 229        |\n",
      "|    ep_rew_mean          | 1.16e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 269        |\n",
      "|    iterations           | 89         |\n",
      "|    time_elapsed         | 675        |\n",
      "|    total_timesteps      | 182272     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.12427095 |\n",
      "|    clip_fraction        | 0.0884     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.181      |\n",
      "|    explained_variance   | -2.2       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 12.7       |\n",
      "|    n_updates            | 930        |\n",
      "|    policy_gradient_loss | -0.00611   |\n",
      "|    std                  | 0.197      |\n",
      "|    value_loss           | 231        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 248         |\n",
      "|    ep_rew_mean          | 1.37e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 269         |\n",
      "|    iterations           | 90          |\n",
      "|    time_elapsed         | 684         |\n",
      "|    total_timesteps      | 184320      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.034238465 |\n",
      "|    clip_fraction        | 0.0248      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.223       |\n",
      "|    explained_variance   | 0.737       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 8.95e+03    |\n",
      "|    n_updates            | 940         |\n",
      "|    policy_gradient_loss | 0.0176      |\n",
      "|    std                  | 0.191       |\n",
      "|    value_loss           | 7.78e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 262          |\n",
      "|    ep_rew_mean          | 1.54e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 268          |\n",
      "|    iterations           | 91           |\n",
      "|    time_elapsed         | 693          |\n",
      "|    total_timesteps      | 186368       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0070182877 |\n",
      "|    clip_fraction        | 0.0336       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.245        |\n",
      "|    explained_variance   | 0.9          |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 84.9         |\n",
      "|    n_updates            | 950          |\n",
      "|    policy_gradient_loss | -0.00379     |\n",
      "|    std                  | 0.188        |\n",
      "|    value_loss           | 3.29e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 268         |\n",
      "|    ep_rew_mean          | 1.61e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 268         |\n",
      "|    iterations           | 92          |\n",
      "|    time_elapsed         | 701         |\n",
      "|    total_timesteps      | 188416      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011685116 |\n",
      "|    clip_fraction        | 0.0854      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.265       |\n",
      "|    explained_variance   | 0.512       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 31.7        |\n",
      "|    n_updates            | 960         |\n",
      "|    policy_gradient_loss | -0.00447    |\n",
      "|    std                  | 0.183       |\n",
      "|    value_loss           | 68.1        |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 275       |\n",
      "|    ep_rew_mean          | 1.72e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 268       |\n",
      "|    iterations           | 93        |\n",
      "|    time_elapsed         | 708       |\n",
      "|    total_timesteps      | 190464    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0294869 |\n",
      "|    clip_fraction        | 0.0834    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.292     |\n",
      "|    explained_variance   | 0.916     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.1e+03   |\n",
      "|    n_updates            | 970       |\n",
      "|    policy_gradient_loss | 0.0062    |\n",
      "|    std                  | 0.178     |\n",
      "|    value_loss           | 4.45e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<stable_baselines3.ppo.ppo.PPO at 0x7fa3079be1d0>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.learn(total_timesteps=190000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:2673.64 +/- 2473.24\n"
     ]
    }
   ],
   "source": [
    "# Random Agent, before training\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"ppo_acc_200000_steps-improve\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting separated by  26.767012609748193  meters moving at  -7.469366897857429  m/s.\n",
      "Starting separated by  27.195634190312834  meters moving at  -7.248007366886357  m/s.\n",
      "Starting separated by  70.99185762975823  meters moving at  -15.469777425086132  m/s.\n",
      "Starting separated by  57.5538879661524  meters moving at  4.491632540842261  m/s.\n",
      "Starting separated by  60.20966016974557  meters moving at  11.840841306515802  m/s.\n",
      "Starting separated by  17.993909506064902  meters moving at  -10.315432752949627  m/s.\n",
      "TOO CLOSE\n",
      "Starting separated by  14.619434465898017  meters moving at  8.623545197651623  m/s.\n",
      "Starting separated by  54.42942525660077  meters moving at  13.387583122484063  m/s.\n",
      "Starting separated by  68.29757208709796  meters moving at  -12.415843687231881  m/s.\n",
      "Starting separated by  65.32057796543944  meters moving at  -15.676267136582274  m/s.\n",
      "Starting separated by  48.58383699403803  meters moving at  6.419384410544566  m/s.\n",
      "Starting separated by  7.849542748894866  meters moving at  4.5138401854218895  m/s.\n",
      "Starting separated by  5.475559388436617  meters moving at  -3.9123580194700103  m/s.\n",
      "TOO CLOSE\n",
      "Starting separated by  43.51574444161436  meters moving at  14.050194767315311  m/s.\n",
      "Starting separated by  63.20159038029243  meters moving at  -17.77714720996855  m/s.\n",
      "Starting separated by  8.378890155834942  meters moving at  1.2695401504443806  m/s.\n",
      "Starting separated by  40.15285539798828  meters moving at  -5.6310240773998  m/s.\n",
      "Starting separated by  61.83061081650304  meters moving at  -1.9313985494216723  m/s.\n",
      "Starting separated by  16.28262462526649  meters moving at  1.7987145539099174  m/s.\n",
      "Starting separated by  57.41364504268934  meters moving at  7.981816174972632  m/s.\n",
      "Starting separated by  71.63744129796349  meters moving at  -2.0696250812305053  m/s.\n",
      "Starting separated by  67.0548546117289  meters moving at  1.6817787241941105  m/s.\n",
      "Starting separated by  12.843486393405696  meters moving at  3.463428132048117  m/s.\n",
      "Starting separated by  30.57973351389418  meters moving at  -3.2246948499441945  m/s.\n",
      "Starting separated by  66.15536351513913  meters moving at  3.138226828668266  m/s.\n",
      "Starting separated by  58.21111267261559  meters moving at  11.168607002277842  m/s.\n",
      "Starting separated by  6.47246073265788  meters moving at  -1.893659236793538  m/s.\n",
      "Starting separated by  27.02222338619483  meters moving at  18.666331033077867  m/s.\n",
      "Starting separated by  58.621047879280624  meters moving at  -9.251169564232683  m/s.\n",
      "Starting separated by  64.13174270320594  meters moving at  12.206593632791964  m/s.\n",
      "Starting separated by  52.578051149078874  meters moving at  -8.526544801058778  m/s.\n",
      "Starting separated by  50.40092402809655  meters moving at  11.731120150702207  m/s.\n",
      "Starting separated by  9.164698686049961  meters moving at  12.589537026282967  m/s.\n",
      "Starting separated by  8.735142472808466  meters moving at  5.912861905919004  m/s.\n",
      "Starting separated by  58.97721911087288  meters moving at  5.486963944285549  m/s.\n",
      "Starting separated by  23.91149571520237  meters moving at  -7.871217728325855  m/s.\n",
      "Starting separated by  47.59887713832618  meters moving at  1.04064365061447  m/s.\n",
      "Starting separated by  63.17696235726943  meters moving at  5.118878981422505  m/s.\n",
      "Starting separated by  42.25517283033849  meters moving at  4.191097535357535  m/s.\n",
      "Starting separated by  47.19609462141708  meters moving at  13.515369475604405  m/s.\n",
      "Starting separated by  19.18820419703734  meters moving at  1.121817911554663  m/s.\n",
      "Starting separated by  58.61790775781117  meters moving at  7.802550522457423  m/s.\n",
      "Starting separated by  6.677677868568301  meters moving at  -1.5777298833391642  m/s.\n",
      "Starting separated by  45.103740661777  meters moving at  7.103811711777375  m/s.\n",
      "Starting separated by  60.27824490231384  meters moving at  -12.914339041874438  m/s.\n",
      "Starting separated by  31.005175014029575  meters moving at  -11.447635968381599  m/s.\n",
      "Starting separated by  62.69997639026  meters moving at  9.063355631988408  m/s.\n",
      "Starting separated by  12.573362811504069  meters moving at  19.7955999423101  m/s.\n",
      "Starting separated by  43.96601896978779  meters moving at  -11.223204259637132  m/s.\n",
      "Starting separated by  29.32552573343109  meters moving at  20.352355709882254  m/s.\n",
      "Starting separated by  5.535790913360435  meters moving at  12.084860097330221  m/s.\n",
      "Starting separated by  46.34625395664072  meters moving at  -4.638760336656841  m/s.\n",
      "Starting separated by  39.94381135509339  meters moving at  6.00573896038998  m/s.\n",
      "Starting separated by  68.4446523907267  meters moving at  -18.885825859450282  m/s.\n",
      "Starting separated by  23.899807518901056  meters moving at  4.285186819999515  m/s.\n",
      "Starting separated by  22.315306493396346  meters moving at  2.0859740173620303  m/s.\n",
      "Starting separated by  60.116937993480796  meters moving at  8.3094753136582  m/s.\n",
      "Starting separated by  20.69666027000398  meters moving at  14.4153610844237  m/s.\n",
      "Starting separated by  57.113968449343936  meters moving at  7.816498986037892  m/s.\n",
      "Starting separated by  11.366294413335467  meters moving at  16.191372000839834  m/s.\n",
      "Starting separated by  44.22613039111172  meters moving at  5.492605087536255  m/s.\n",
      "Starting separated by  72.04745151998928  meters moving at  12.819675690018695  m/s.\n",
      "Starting separated by  38.56983618716883  meters moving at  15.630517286720124  m/s.\n",
      "Starting separated by  47.08004624082927  meters moving at  16.537009573621756  m/s.\n",
      "Starting separated by  71.21133469918192  meters moving at  -13.393305473502188  m/s.\n",
      "Starting separated by  14.119930179714455  meters moving at  -2.8869733500926316  m/s.\n",
      "Starting separated by  70.2235553376695  meters moving at  10.953194028465898  m/s.\n",
      "Starting separated by  21.304365726319627  meters moving at  10.77666545222462  m/s.\n",
      "Starting separated by  74.06781571502943  meters moving at  -0.5275017695848838  m/s.\n",
      "Starting separated by  45.69786151054218  meters moving at  7.188888599261482  m/s.\n",
      "Starting separated by  5.963777071987465  meters moving at  -3.2713810280835656  m/s.\n",
      "Starting separated by  14.103410327711588  meters moving at  7.956387690203973  m/s.\n",
      "Starting separated by  70.15623712909554  meters moving at  8.34737750530967  m/s.\n",
      "Starting separated by  71.40218124690867  meters moving at  -0.05916797667539342  m/s.\n",
      "Starting separated by  8.276840878438062  meters moving at  18.606135029950124  m/s.\n",
      "Starting separated by  42.92972392369375  meters moving at  -13.021943908049808  m/s.\n",
      "Starting separated by  11.376744909979418  meters moving at  21.989940098754467  m/s.\n",
      "Starting separated by  39.72358151597953  meters moving at  -10.486924606180384  m/s.\n",
      "Starting separated by  5.958701300169173  meters moving at  8.997690402855254  m/s.\n",
      "Starting separated by  6.568965231692896  meters moving at  2.9356884500391436  m/s.\n",
      "Starting separated by  19.079066570643825  meters moving at  16.193405026792913  m/s.\n",
      "Starting separated by  67.08749166677887  meters moving at  -13.784806159581493  m/s.\n",
      "Starting separated by  35.34693335783747  meters moving at  3.378064613692823  m/s.\n",
      "Starting separated by  14.37334995045638  meters moving at  8.758317562944475  m/s.\n",
      "Starting separated by  54.151857947904844  meters moving at  -8.431501367558255  m/s.\n",
      "Starting separated by  46.357281680127656  meters moving at  -15.31655697995921  m/s.\n",
      "Starting separated by  61.367133500111464  meters moving at  -10.099352458318494  m/s.\n",
      "Starting separated by  17.23529805035999  meters moving at  -7.2443033502577965  m/s.\n",
      "Starting separated by  57.44512054830893  meters moving at  -8.034796434566635  m/s.\n",
      "Starting separated by  32.483273984495625  meters moving at  -1.2604290079338618  m/s.\n",
      "Starting separated by  56.01425757512647  meters moving at  -13.07613939760072  m/s.\n",
      "Starting separated by  5.877213482544135  meters moving at  3.1281500509714215  m/s.\n",
      "Starting separated by  15.914507221925433  meters moving at  16.017272492708628  m/s.\n",
      "Starting separated by  35.485548745756546  meters moving at  -8.723983672321776  m/s.\n",
      "Starting separated by  16.810008510681396  meters moving at  12.670199713201844  m/s.\n",
      "Starting separated by  27.446004586819026  meters moving at  1.6938079503991492  m/s.\n",
      "Starting separated by  46.421955320736835  meters moving at  15.520806673856733  m/s.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting separated by  62.08051124632904  meters moving at  -14.21703242832725  m/s.\n",
      "Starting separated by  64.63935820763658  meters moving at  -8.80071641403511  m/s.\n",
      "Starting separated by  57.23559909380413  meters moving at  -11.12349884371351  m/s.\n",
      "Starting separated by  50.99556745680128  meters moving at  -13.868486376053468  m/s.\n",
      "mean_reward:398.23 +/- 82.39\n"
     ]
    }
   ],
   "source": [
    "# Random Agent, before training\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=100)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"ppo_acc_410000_steps-improve\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = PPO.load(\"ppo_acc\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "obs = env.reset()\n",
    "for i in range(0,3000):\n",
    "    action, _states = model.predict(obs)\n",
    "    obs, rewards, dones, info = env.step(action)\n",
    "    env.render()\n",
    "    if dones:\n",
    "        env.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nnequiv-tf1",
   "language": "python",
   "name": "nnequiv-tf1"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
