{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "\n",
    "from stable_baselines3 import PPO\n",
    "from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise\n",
    "from stable_baselines3.common.evaluation import evaluate_policy\n",
    "import time\n",
    "import numpy as np\n",
    "\n",
    "from torch import nn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import acc2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/gym/logger.py:34: UserWarning: \u001b[33mWARN: Environment '<class 'acc2.ACCEnv2'>' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior.\u001b[0m\n",
      "  warnings.warn(colorize(\"%s: %s\" % (\"WARN\", msg % args), \"yellow\"))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using cuda device\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n"
     ]
    }
   ],
   "source": [
    "env = gym.make(\"acc-variant-v3\")\n",
    "architecture = [dict(pi=[64, 64, 64, 64], vf=[64, 64, 64, 64])]\n",
    "model = PPO(\"MlpPolicy\", env, verbose=1,policy_kwargs={\"activation_fn\":nn.ReLU,\"net_arch\":architecture})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.unwrapped.INCLUDE_UNWINNABLE=False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/stable_baselines3/common/evaluation.py:69: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n",
      "  UserWarning,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:-341.28 +/- 473.83\n"
     ]
    }
   ],
   "source": [
    "# Random Agent, before training\n",
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.unwrapped.INCLUDE_UNWINNABLE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 18       |\n",
      "|    ep_rew_mean     | -300     |\n",
      "| time/              |          |\n",
      "|    fps             | 384      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 5        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 20.2        |\n",
      "|    ep_rew_mean          | -260        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 310         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 13          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009322639 |\n",
      "|    clip_fraction        | 0.0618      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.42       |\n",
      "|    explained_variance   | -2.42e-05   |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.8e+03     |\n",
      "|    n_updates            | 10          |\n",
      "|    policy_gradient_loss | -0.00379    |\n",
      "|    std                  | 1.01        |\n",
      "|    value_loss           | 4.24e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 23.2        |\n",
      "|    ep_rew_mean          | -270        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 283         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 21          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009755716 |\n",
      "|    clip_fraction        | 0.0697      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.43       |\n",
      "|    explained_variance   | 0.733       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.36e+04    |\n",
      "|    n_updates            | 20          |\n",
      "|    policy_gradient_loss | -0.00313    |\n",
      "|    std                  | 1.01        |\n",
      "|    value_loss           | 1.98e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 24.4         |\n",
      "|    ep_rew_mean          | -270         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 263          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 31           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0054252045 |\n",
      "|    clip_fraction        | 0.0634       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.42        |\n",
      "|    explained_variance   | 0.678        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.1e+04      |\n",
      "|    n_updates            | 30           |\n",
      "|    policy_gradient_loss | -0.00532     |\n",
      "|    std                  | 1            |\n",
      "|    value_loss           | 2.41e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 20.7         |\n",
      "|    ep_rew_mean          | -360         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 254          |\n",
      "|    iterations           | 5            |\n",
      "|    time_elapsed         | 40           |\n",
      "|    total_timesteps      | 10240        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020354576 |\n",
      "|    clip_fraction        | 0.0828       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.42        |\n",
      "|    explained_variance   | 0.725        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.54e+03     |\n",
      "|    n_updates            | 40           |\n",
      "|    policy_gradient_loss | 0.000563     |\n",
      "|    std                  | 0.999        |\n",
      "|    value_loss           | 1.69e+04     |\n",
      "------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "model = model.learn(total_timesteps=10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:-191.35 +/- 392.91\n"
     ]
    }
   ],
   "source": [
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 23.8     |\n",
      "|    ep_rew_mean     | -247     |\n",
      "| time/              |          |\n",
      "|    fps             | 379      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 5        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 22.6        |\n",
      "|    ep_rew_mean          | -190        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 319         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 12          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013738977 |\n",
      "|    clip_fraction        | 0.0703      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.41       |\n",
      "|    explained_variance   | 0.873       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.79e+03    |\n",
      "|    n_updates            | 60          |\n",
      "|    policy_gradient_loss | -0.00589    |\n",
      "|    std                  | 0.995       |\n",
      "|    value_loss           | 7.04e+03    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 25.5      |\n",
      "|    ep_rew_mean          | -220      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 301       |\n",
      "|    iterations           | 3         |\n",
      "|    time_elapsed         | 20        |\n",
      "|    total_timesteps      | 6144      |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0727522 |\n",
      "|    clip_fraction        | 0.201     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.41     |\n",
      "|    explained_variance   | 0.916     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 404       |\n",
      "|    n_updates            | 70        |\n",
      "|    policy_gradient_loss | 0.00495   |\n",
      "|    std                  | 0.988     |\n",
      "|    value_loss           | 3.66e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 24          |\n",
      "|    ep_rew_mean          | -170        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 282         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 29          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002597937 |\n",
      "|    clip_fraction        | 0.0459      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.41       |\n",
      "|    explained_variance   | 0.825       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.71e+03    |\n",
      "|    n_updates            | 80          |\n",
      "|    policy_gradient_loss | -0.00156    |\n",
      "|    std                  | 0.986       |\n",
      "|    value_loss           | 9.84e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 25.1        |\n",
      "|    ep_rew_mean          | -120        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 269         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 37          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011659203 |\n",
      "|    clip_fraction        | 0.117       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.4        |\n",
      "|    explained_variance   | 0.883       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.28e+03    |\n",
      "|    n_updates            | 90          |\n",
      "|    policy_gradient_loss | 0.00219     |\n",
      "|    std                  | 0.978       |\n",
      "|    value_loss           | 5e+03       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18          |\n",
      "|    ep_rew_mean          | -140        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 266         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 46          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.122682676 |\n",
      "|    clip_fraction        | 0.256       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.873       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 46.1        |\n",
      "|    n_updates            | 100         |\n",
      "|    policy_gradient_loss | 0.0101      |\n",
      "|    std                  | 0.974       |\n",
      "|    value_loss           | 2.88e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.5        |\n",
      "|    ep_rew_mean          | -200        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 260         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 55          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019611528 |\n",
      "|    clip_fraction        | 0.0871      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.9         |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.39e+03    |\n",
      "|    n_updates            | 110         |\n",
      "|    policy_gradient_loss | 0.00404     |\n",
      "|    std                  | 0.971       |\n",
      "|    value_loss           | 4.58e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.6         |\n",
      "|    ep_rew_mean          | -220         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 262          |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 62           |\n",
      "|    total_timesteps      | 16384        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0051340526 |\n",
      "|    clip_fraction        | 0.0735       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.904        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.67e+03     |\n",
      "|    n_updates            | 120          |\n",
      "|    policy_gradient_loss | -0.00175     |\n",
      "|    std                  | 0.973        |\n",
      "|    value_loss           | 6.51e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 14.8         |\n",
      "|    ep_rew_mean          | -110         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 261          |\n",
      "|    iterations           | 9            |\n",
      "|    time_elapsed         | 70           |\n",
      "|    total_timesteps      | 18432        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027780328 |\n",
      "|    clip_fraction        | 0.0496       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.947        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 289          |\n",
      "|    n_updates            | 130          |\n",
      "|    policy_gradient_loss | -0.00279     |\n",
      "|    std                  | 0.975        |\n",
      "|    value_loss           | 3.46e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.1         |\n",
      "|    ep_rew_mean          | -160         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 258          |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 79           |\n",
      "|    total_timesteps      | 20480        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0032841684 |\n",
      "|    clip_fraction        | 0.129        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.4         |\n",
      "|    explained_variance   | 0.928        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 8.25e+03     |\n",
      "|    n_updates            | 140          |\n",
      "|    policy_gradient_loss | 0.00411      |\n",
      "|    std                  | 0.983        |\n",
      "|    value_loss           | 3.61e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.3        |\n",
      "|    ep_rew_mean          | -190        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 255         |\n",
      "|    iterations           | 11          |\n",
      "|    time_elapsed         | 88          |\n",
      "|    total_timesteps      | 22528       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001673599 |\n",
      "|    clip_fraction        | 0.0412      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.4        |\n",
      "|    explained_variance   | 0.899       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.09e+03    |\n",
      "|    n_updates            | 150         |\n",
      "|    policy_gradient_loss | 0.000419    |\n",
      "|    std                  | 0.977       |\n",
      "|    value_loss           | 5.79e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.7         |\n",
      "|    ep_rew_mean          | -190         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 252          |\n",
      "|    iterations           | 12           |\n",
      "|    time_elapsed         | 97           |\n",
      "|    total_timesteps      | 24576        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0065887696 |\n",
      "|    clip_fraction        | 0.0378       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.906        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.12e+03     |\n",
      "|    n_updates            | 160          |\n",
      "|    policy_gradient_loss | 0.00296      |\n",
      "|    std                  | 0.971        |\n",
      "|    value_loss           | 7.01e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.3         |\n",
      "|    ep_rew_mean          | -160         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 251          |\n",
      "|    iterations           | 13           |\n",
      "|    time_elapsed         | 105          |\n",
      "|    total_timesteps      | 26624        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0028801984 |\n",
      "|    clip_fraction        | 0.0566       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.939        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.83e+03     |\n",
      "|    n_updates            | 170          |\n",
      "|    policy_gradient_loss | -0.000476    |\n",
      "|    std                  | 0.972        |\n",
      "|    value_loss           | 4.2e+03      |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.2        |\n",
      "|    ep_rew_mean          | -160        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 251         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 114         |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002102976 |\n",
      "|    clip_fraction        | 0.0412      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.86        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.35e+03    |\n",
      "|    n_updates            | 180         |\n",
      "|    policy_gradient_loss | -0.000297   |\n",
      "|    std                  | 0.971       |\n",
      "|    value_loss           | 1.08e+04    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 16         |\n",
      "|    ep_rew_mean          | -170       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 249        |\n",
      "|    iterations           | 15         |\n",
      "|    time_elapsed         | 123        |\n",
      "|    total_timesteps      | 30720      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01034803 |\n",
      "|    clip_fraction        | 0.0447     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.39      |\n",
      "|    explained_variance   | 0.85       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 5.67e+03   |\n",
      "|    n_updates            | 190        |\n",
      "|    policy_gradient_loss | 0.000598   |\n",
      "|    std                  | 0.969      |\n",
      "|    value_loss           | 9.22e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 15.4        |\n",
      "|    ep_rew_mean          | -160        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 245         |\n",
      "|    iterations           | 16          |\n",
      "|    time_elapsed         | 133         |\n",
      "|    total_timesteps      | 32768       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005510375 |\n",
      "|    clip_fraction        | 0.068       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.868       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.89e+03    |\n",
      "|    n_updates            | 200         |\n",
      "|    policy_gradient_loss | -0.000467   |\n",
      "|    std                  | 0.979       |\n",
      "|    value_loss           | 9.37e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.7         |\n",
      "|    ep_rew_mean          | -150         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 249          |\n",
      "|    iterations           | 17           |\n",
      "|    time_elapsed         | 139          |\n",
      "|    total_timesteps      | 34816        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0060754484 |\n",
      "|    clip_fraction        | 0.0715       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.836        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.91e+03     |\n",
      "|    n_updates            | 210          |\n",
      "|    policy_gradient_loss | -0.00274     |\n",
      "|    std                  | 0.973        |\n",
      "|    value_loss           | 9.47e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18.4        |\n",
      "|    ep_rew_mean          | -60.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 253         |\n",
      "|    iterations           | 18          |\n",
      "|    time_elapsed         | 145         |\n",
      "|    total_timesteps      | 36864       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002080983 |\n",
      "|    clip_fraction        | 0.0643      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.918       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2e+03       |\n",
      "|    n_updates            | 220         |\n",
      "|    policy_gradient_loss | 0.00225     |\n",
      "|    std                  | 0.969       |\n",
      "|    value_loss           | 5.9e+03     |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.2        |\n",
      "|    ep_rew_mean          | -110        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 255         |\n",
      "|    iterations           | 19          |\n",
      "|    time_elapsed         | 152         |\n",
      "|    total_timesteps      | 38912       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007252603 |\n",
      "|    clip_fraction        | 0.162       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.64        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.27e+03    |\n",
      "|    n_updates            | 230         |\n",
      "|    policy_gradient_loss | 0.00405     |\n",
      "|    std                  | 0.972       |\n",
      "|    value_loss           | 7.07e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.4         |\n",
      "|    ep_rew_mean          | -130         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 258          |\n",
      "|    iterations           | 20           |\n",
      "|    time_elapsed         | 158          |\n",
      "|    total_timesteps      | 40960        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0020881684 |\n",
      "|    clip_fraction        | 0.103        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 849          |\n",
      "|    n_updates            | 240          |\n",
      "|    policy_gradient_loss | 0.00663      |\n",
      "|    std                  | 0.969        |\n",
      "|    value_loss           | 7.18e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.3         |\n",
      "|    ep_rew_mean          | -120         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 259          |\n",
      "|    iterations           | 21           |\n",
      "|    time_elapsed         | 165          |\n",
      "|    total_timesteps      | 43008        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016806822 |\n",
      "|    clip_fraction        | 0.0541       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.913        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.45e+03     |\n",
      "|    n_updates            | 250          |\n",
      "|    policy_gradient_loss | 0.00105      |\n",
      "|    std                  | 0.967        |\n",
      "|    value_loss           | 4.5e+03      |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 17.4       |\n",
      "|    ep_rew_mean          | -110       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 261        |\n",
      "|    iterations           | 22         |\n",
      "|    time_elapsed         | 172        |\n",
      "|    total_timesteps      | 45056      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.04387749 |\n",
      "|    clip_fraction        | 0.257      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.38      |\n",
      "|    explained_variance   | 0.886      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 321        |\n",
      "|    n_updates            | 260        |\n",
      "|    policy_gradient_loss | 0.0205     |\n",
      "|    std                  | 0.967      |\n",
      "|    value_loss           | 3.43e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18.2        |\n",
      "|    ep_rew_mean          | -50.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 258         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 182         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004282859 |\n",
      "|    clip_fraction        | 0.0225      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.731       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 454         |\n",
      "|    n_updates            | 270         |\n",
      "|    policy_gradient_loss | 0.000121    |\n",
      "|    std                  | 0.966       |\n",
      "|    value_loss           | 7.46e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.2        |\n",
      "|    ep_rew_mean          | -60.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 257         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 190         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002742874 |\n",
      "|    clip_fraction        | 0.181       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.706       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.02e+04    |\n",
      "|    n_updates            | 280         |\n",
      "|    policy_gradient_loss | 0.0138      |\n",
      "|    std                  | 0.964       |\n",
      "|    value_loss           | 8.32e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.7        |\n",
      "|    ep_rew_mean          | -180        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 256         |\n",
      "|    iterations           | 25          |\n",
      "|    time_elapsed         | 199         |\n",
      "|    total_timesteps      | 51200       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016336221 |\n",
      "|    clip_fraction        | 0.108       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.786       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.19e+03    |\n",
      "|    n_updates            | 290         |\n",
      "|    policy_gradient_loss | 0.000991    |\n",
      "|    std                  | 0.958       |\n",
      "|    value_loss           | 5.93e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.4         |\n",
      "|    ep_rew_mean          | -110         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 255          |\n",
      "|    iterations           | 26           |\n",
      "|    time_elapsed         | 208          |\n",
      "|    total_timesteps      | 53248        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0041199718 |\n",
      "|    clip_fraction        | 0.0256       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.38        |\n",
      "|    explained_variance   | 0.767        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.44e+03     |\n",
      "|    n_updates            | 300          |\n",
      "|    policy_gradient_loss | -0.000378    |\n",
      "|    std                  | 0.957        |\n",
      "|    value_loss           | 1.48e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.6        |\n",
      "|    ep_rew_mean          | -180        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 254         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 217         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012632364 |\n",
      "|    clip_fraction        | 0.0902      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.903       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 539         |\n",
      "|    n_updates            | 310         |\n",
      "|    policy_gradient_loss | 0.00166     |\n",
      "|    std                  | 0.962       |\n",
      "|    value_loss           | 4.31e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.2        |\n",
      "|    ep_rew_mean          | -110        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 253         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 226         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011165957 |\n",
      "|    clip_fraction        | 0.0458      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.841       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.11e+03    |\n",
      "|    n_updates            | 320         |\n",
      "|    policy_gradient_loss | -0.00018    |\n",
      "|    std                  | 0.961       |\n",
      "|    value_loss           | 1.09e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 19         |\n",
      "|    ep_rew_mean          | -140       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 252        |\n",
      "|    iterations           | 29         |\n",
      "|    time_elapsed         | 235        |\n",
      "|    total_timesteps      | 59392      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02328099 |\n",
      "|    clip_fraction        | 0.472      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.38      |\n",
      "|    explained_variance   | 0.947      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.33e+03   |\n",
      "|    n_updates            | 330        |\n",
      "|    policy_gradient_loss | 0.0258     |\n",
      "|    std                  | 0.972      |\n",
      "|    value_loss           | 1.63e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 18.4         |\n",
      "|    ep_rew_mean          | -90.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 251          |\n",
      "|    iterations           | 30           |\n",
      "|    time_elapsed         | 243          |\n",
      "|    total_timesteps      | 61440        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0028975422 |\n",
      "|    clip_fraction        | 0.0474       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.714        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.98e+03     |\n",
      "|    n_updates            | 340          |\n",
      "|    policy_gradient_loss | 0.000186     |\n",
      "|    std                  | 0.971        |\n",
      "|    value_loss           | 1.17e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 19.8        |\n",
      "|    ep_rew_mean          | -50.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 250         |\n",
      "|    iterations           | 31          |\n",
      "|    time_elapsed         | 252         |\n",
      "|    total_timesteps      | 63488       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001728432 |\n",
      "|    clip_fraction        | 0.0133      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.39       |\n",
      "|    explained_variance   | 0.726       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.59e+03    |\n",
      "|    n_updates            | 350         |\n",
      "|    policy_gradient_loss | 0.00159     |\n",
      "|    std                  | 0.972       |\n",
      "|    value_loss           | 9.97e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 18.3         |\n",
      "|    ep_rew_mean          | -160         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 249          |\n",
      "|    iterations           | 32           |\n",
      "|    time_elapsed         | 262          |\n",
      "|    total_timesteps      | 65536        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030261255 |\n",
      "|    clip_fraction        | 0.0235       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.63         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.05e+03     |\n",
      "|    n_updates            | 360          |\n",
      "|    policy_gradient_loss | 0.00287      |\n",
      "|    std                  | 0.966        |\n",
      "|    value_loss           | 7.68e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 19.5         |\n",
      "|    ep_rew_mean          | -110         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 248          |\n",
      "|    iterations           | 33           |\n",
      "|    time_elapsed         | 271          |\n",
      "|    total_timesteps      | 67584        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0024796517 |\n",
      "|    clip_fraction        | 0.0213       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.38        |\n",
      "|    explained_variance   | 0.793        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.08e+03     |\n",
      "|    n_updates            | 370          |\n",
      "|    policy_gradient_loss | 0.000739     |\n",
      "|    std                  | 0.966        |\n",
      "|    value_loss           | 9.98e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.9         |\n",
      "|    ep_rew_mean          | -100         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 248          |\n",
      "|    iterations           | 34           |\n",
      "|    time_elapsed         | 280          |\n",
      "|    total_timesteps      | 69632        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004603427 |\n",
      "|    clip_fraction        | 0.0121       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.706        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.32e+03     |\n",
      "|    n_updates            | 380          |\n",
      "|    policy_gradient_loss | 0.00018      |\n",
      "|    std                  | 0.967        |\n",
      "|    value_loss           | 1.09e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 18.9         |\n",
      "|    ep_rew_mean          | -40.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 248          |\n",
      "|    iterations           | 35           |\n",
      "|    time_elapsed         | 288          |\n",
      "|    total_timesteps      | 71680        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007072571 |\n",
      "|    clip_fraction        | 0.0112       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.38        |\n",
      "|    explained_variance   | 0.691        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.09e+03     |\n",
      "|    n_updates            | 390          |\n",
      "|    policy_gradient_loss | -0.000551    |\n",
      "|    std                  | 0.964        |\n",
      "|    value_loss           | 1.14e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18.8        |\n",
      "|    ep_rew_mean          | -80.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 247         |\n",
      "|    iterations           | 36          |\n",
      "|    time_elapsed         | 298         |\n",
      "|    total_timesteps      | 73728       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005425937 |\n",
      "|    clip_fraction        | 0.257       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.595       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.88e+03    |\n",
      "|    n_updates            | 400         |\n",
      "|    policy_gradient_loss | 0.0137      |\n",
      "|    std                  | 0.957       |\n",
      "|    value_loss           | 5.94e+03    |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 17.9          |\n",
      "|    ep_rew_mean          | -60.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 246           |\n",
      "|    iterations           | 37            |\n",
      "|    time_elapsed         | 306           |\n",
      "|    total_timesteps      | 75776         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00037917262 |\n",
      "|    clip_fraction        | 0.0042        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.37         |\n",
      "|    explained_variance   | 0.524         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 1.02e+04      |\n",
      "|    n_updates            | 410           |\n",
      "|    policy_gradient_loss | 3.99e-05      |\n",
      "|    std                  | 0.956         |\n",
      "|    value_loss           | 1.25e+04      |\n",
      "-------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.7        |\n",
      "|    ep_rew_mean          | -130        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 246         |\n",
      "|    iterations           | 38          |\n",
      "|    time_elapsed         | 315         |\n",
      "|    total_timesteps      | 77824       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.060700238 |\n",
      "|    clip_fraction        | 0.108       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.37       |\n",
      "|    explained_variance   | 0.778       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.29e+03    |\n",
      "|    n_updates            | 420         |\n",
      "|    policy_gradient_loss | 0.00216     |\n",
      "|    std                  | 0.96        |\n",
      "|    value_loss           | 5.54e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.5        |\n",
      "|    ep_rew_mean          | -180        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 245         |\n",
      "|    iterations           | 39          |\n",
      "|    time_elapsed         | 324         |\n",
      "|    total_timesteps      | 79872       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002442908 |\n",
      "|    clip_fraction        | 0.0649      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.781       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.22e+03    |\n",
      "|    n_updates            | 430         |\n",
      "|    policy_gradient_loss | 0.000641    |\n",
      "|    std                  | 0.96        |\n",
      "|    value_loss           | 1.13e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.4         |\n",
      "|    ep_rew_mean          | -160         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 245          |\n",
      "|    iterations           | 40           |\n",
      "|    time_elapsed         | 333          |\n",
      "|    total_timesteps      | 81920        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007720437 |\n",
      "|    clip_fraction        | 0.0117       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.38        |\n",
      "|    explained_variance   | 0.762        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.88e+03     |\n",
      "|    n_updates            | 440          |\n",
      "|    policy_gradient_loss | 0.000178     |\n",
      "|    std                  | 0.96         |\n",
      "|    value_loss           | 1.67e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.5         |\n",
      "|    ep_rew_mean          | -70.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 245          |\n",
      "|    iterations           | 41           |\n",
      "|    time_elapsed         | 342          |\n",
      "|    total_timesteps      | 83968        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0010256863 |\n",
      "|    clip_fraction        | 0.0149       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.38        |\n",
      "|    explained_variance   | 0.764        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 8.23e+03     |\n",
      "|    n_updates            | 450          |\n",
      "|    policy_gradient_loss | -0.00311     |\n",
      "|    std                  | 0.958        |\n",
      "|    value_loss           | 1.4e+04      |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16           |\n",
      "|    ep_rew_mean          | -100         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 244          |\n",
      "|    iterations           | 42           |\n",
      "|    time_elapsed         | 351          |\n",
      "|    total_timesteps      | 86016        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013303936 |\n",
      "|    clip_fraction        | 0.0193       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.37        |\n",
      "|    explained_variance   | 0.723        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 580          |\n",
      "|    n_updates            | 460          |\n",
      "|    policy_gradient_loss | -7.29e-05    |\n",
      "|    std                  | 0.955        |\n",
      "|    value_loss           | 8.3e+03      |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.1         |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 245          |\n",
      "|    iterations           | 43           |\n",
      "|    time_elapsed         | 359          |\n",
      "|    total_timesteps      | 88064        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0047836704 |\n",
      "|    clip_fraction        | 0.139        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.37        |\n",
      "|    explained_variance   | 0.854        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 502          |\n",
      "|    n_updates            | 470          |\n",
      "|    policy_gradient_loss | 0.00852      |\n",
      "|    std                  | 0.954        |\n",
      "|    value_loss           | 4.61e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.9        |\n",
      "|    ep_rew_mean          | -70.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 244         |\n",
      "|    iterations           | 44          |\n",
      "|    time_elapsed         | 367         |\n",
      "|    total_timesteps      | 90112       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016913999 |\n",
      "|    clip_fraction        | 0.133       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.37       |\n",
      "|    explained_variance   | 0.608       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.62e+03    |\n",
      "|    n_updates            | 480         |\n",
      "|    policy_gradient_loss | 0.00737     |\n",
      "|    std                  | 0.955       |\n",
      "|    value_loss           | 5.87e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 14.9         |\n",
      "|    ep_rew_mean          | -80.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 244          |\n",
      "|    iterations           | 45           |\n",
      "|    time_elapsed         | 376          |\n",
      "|    total_timesteps      | 92160        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0037281476 |\n",
      "|    clip_fraction        | 0.0694       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.37        |\n",
      "|    explained_variance   | 0.692        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.62e+03     |\n",
      "|    n_updates            | 490          |\n",
      "|    policy_gradient_loss | 0.00247      |\n",
      "|    std                  | 0.955        |\n",
      "|    value_loss           | 9e+03        |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 18.8         |\n",
      "|    ep_rew_mean          | -70.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 244          |\n",
      "|    iterations           | 46           |\n",
      "|    time_elapsed         | 385          |\n",
      "|    total_timesteps      | 94208        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0011640507 |\n",
      "|    clip_fraction        | 0.0224       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.37        |\n",
      "|    explained_variance   | 0.757        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.45e+03     |\n",
      "|    n_updates            | 500          |\n",
      "|    policy_gradient_loss | 0.0017       |\n",
      "|    std                  | 0.953        |\n",
      "|    value_loss           | 9.53e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.9         |\n",
      "|    ep_rew_mean          | -100         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 244          |\n",
      "|    iterations           | 47           |\n",
      "|    time_elapsed         | 394          |\n",
      "|    total_timesteps      | 96256        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013490139 |\n",
      "|    clip_fraction        | 0.0161       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.37        |\n",
      "|    explained_variance   | 0.653        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.68e+03     |\n",
      "|    n_updates            | 510          |\n",
      "|    policy_gradient_loss | -0.00181     |\n",
      "|    std                  | 0.951        |\n",
      "|    value_loss           | 1.07e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 20.5        |\n",
      "|    ep_rew_mean          | -60.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 243         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 403         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003534777 |\n",
      "|    clip_fraction        | 0.0246      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.37       |\n",
      "|    explained_variance   | 0.595       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.43e+04    |\n",
      "|    n_updates            | 520         |\n",
      "|    policy_gradient_loss | -0.000304   |\n",
      "|    std                  | 0.949       |\n",
      "|    value_loss           | 1.76e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18          |\n",
      "|    ep_rew_mean          | -120        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 243         |\n",
      "|    iterations           | 49          |\n",
      "|    time_elapsed         | 411         |\n",
      "|    total_timesteps      | 100352      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.043872565 |\n",
      "|    clip_fraction        | 0.0943      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.37       |\n",
      "|    explained_variance   | 0.399       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.22e+04    |\n",
      "|    n_updates            | 530         |\n",
      "|    policy_gradient_loss | 0.00274     |\n",
      "|    std                  | 0.949       |\n",
      "|    value_loss           | 1.08e+04    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.6      |\n",
      "|    ep_rew_mean          | -70.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 243       |\n",
      "|    iterations           | 50        |\n",
      "|    time_elapsed         | 421       |\n",
      "|    total_timesteps      | 102400    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.5982332 |\n",
      "|    clip_fraction        | 0.215     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.37     |\n",
      "|    explained_variance   | 0.673     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.53e+03  |\n",
      "|    n_updates            | 540       |\n",
      "|    policy_gradient_loss | 0.0219    |\n",
      "|    std                  | 0.947     |\n",
      "|    value_loss           | 1.1e+04   |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.3         |\n",
      "|    ep_rew_mean          | -110         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 243          |\n",
      "|    iterations           | 51           |\n",
      "|    time_elapsed         | 429          |\n",
      "|    total_timesteps      | 104448       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012476849 |\n",
      "|    clip_fraction        | 0.0111       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.36        |\n",
      "|    explained_variance   | 0.607        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.66e+03     |\n",
      "|    n_updates            | 550          |\n",
      "|    policy_gradient_loss | -0.00219     |\n",
      "|    std                  | 0.944        |\n",
      "|    value_loss           | 1.2e+04      |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.3         |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 242          |\n",
      "|    iterations           | 52           |\n",
      "|    time_elapsed         | 438          |\n",
      "|    total_timesteps      | 106496       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008232849 |\n",
      "|    clip_fraction        | 0.00864      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.36        |\n",
      "|    explained_variance   | 0.601        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.84e+03     |\n",
      "|    n_updates            | 560          |\n",
      "|    policy_gradient_loss | -0.000452    |\n",
      "|    std                  | 0.94         |\n",
      "|    value_loss           | 1.62e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.4         |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 242          |\n",
      "|    iterations           | 53           |\n",
      "|    time_elapsed         | 447          |\n",
      "|    total_timesteps      | 108544       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014429846 |\n",
      "|    clip_fraction        | 0.0169       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.35        |\n",
      "|    explained_variance   | 0.591        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.86e+03     |\n",
      "|    n_updates            | 570          |\n",
      "|    policy_gradient_loss | -0.000355    |\n",
      "|    std                  | 0.937        |\n",
      "|    value_loss           | 1.34e+04     |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 17.2          |\n",
      "|    ep_rew_mean          | -80.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 242           |\n",
      "|    iterations           | 54            |\n",
      "|    time_elapsed         | 456           |\n",
      "|    total_timesteps      | 110592        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00062477845 |\n",
      "|    clip_fraction        | 0.0452        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.35         |\n",
      "|    explained_variance   | 0.58          |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 5.13e+03      |\n",
      "|    n_updates            | 580           |\n",
      "|    policy_gradient_loss | 0.00116       |\n",
      "|    std                  | 0.937         |\n",
      "|    value_loss           | 1.33e+04      |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16           |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 241          |\n",
      "|    iterations           | 55           |\n",
      "|    time_elapsed         | 465          |\n",
      "|    total_timesteps      | 112640       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014286976 |\n",
      "|    clip_fraction        | 0.0374       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.35        |\n",
      "|    explained_variance   | 0.546        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.9e+03      |\n",
      "|    n_updates            | 590          |\n",
      "|    policy_gradient_loss | -2.08e-05    |\n",
      "|    std                  | 0.936        |\n",
      "|    value_loss           | 1.46e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.3     |\n",
      "|    ep_rew_mean          | -140     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 241      |\n",
      "|    iterations           | 56       |\n",
      "|    time_elapsed         | 474      |\n",
      "|    total_timesteps      | 114688   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 8.571417 |\n",
      "|    clip_fraction        | 0.72     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.36    |\n",
      "|    explained_variance   | 0.604    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.29e+03 |\n",
      "|    n_updates            | 600      |\n",
      "|    policy_gradient_loss | 0.0576   |\n",
      "|    std                  | 0.938    |\n",
      "|    value_loss           | 8.07e+03 |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 16.9       |\n",
      "|    ep_rew_mean          | -70.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 241        |\n",
      "|    iterations           | 57         |\n",
      "|    time_elapsed         | 483        |\n",
      "|    total_timesteps      | 116736     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03340872 |\n",
      "|    clip_fraction        | 0.0604     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.35      |\n",
      "|    explained_variance   | 0.701      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 714        |\n",
      "|    n_updates            | 610        |\n",
      "|    policy_gradient_loss | 0.000462   |\n",
      "|    std                  | 0.935      |\n",
      "|    value_loss           | 1.28e+04   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.4         |\n",
      "|    ep_rew_mean          | -100         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 242          |\n",
      "|    iterations           | 58           |\n",
      "|    time_elapsed         | 490          |\n",
      "|    total_timesteps      | 118784       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0068404805 |\n",
      "|    clip_fraction        | 0.0403       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.35        |\n",
      "|    explained_variance   | 0.381        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.7e+03      |\n",
      "|    n_updates            | 620          |\n",
      "|    policy_gradient_loss | -0.00043     |\n",
      "|    std                  | 0.93         |\n",
      "|    value_loss           | 1.28e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.9         |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 243          |\n",
      "|    iterations           | 59           |\n",
      "|    time_elapsed         | 496          |\n",
      "|    total_timesteps      | 120832       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022554058 |\n",
      "|    clip_fraction        | 0.0265       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.35        |\n",
      "|    explained_variance   | 0.668        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.58e+03     |\n",
      "|    n_updates            | 630          |\n",
      "|    policy_gradient_loss | 0.000401     |\n",
      "|    std                  | 0.929        |\n",
      "|    value_loss           | 7.68e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 15.3        |\n",
      "|    ep_rew_mean          | -80.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 244         |\n",
      "|    iterations           | 60          |\n",
      "|    time_elapsed         | 502         |\n",
      "|    total_timesteps      | 122880      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001602391 |\n",
      "|    clip_fraction        | 0.0153      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.34       |\n",
      "|    explained_variance   | 0.674       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.03e+04    |\n",
      "|    n_updates            | 640         |\n",
      "|    policy_gradient_loss | -0.00149    |\n",
      "|    std                  | 0.928       |\n",
      "|    value_loss           | 1.05e+04    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.2      |\n",
      "|    ep_rew_mean          | -50.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 245       |\n",
      "|    iterations           | 61        |\n",
      "|    time_elapsed         | 508       |\n",
      "|    total_timesteps      | 124928    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 14.591528 |\n",
      "|    clip_fraction        | 0.781     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.34     |\n",
      "|    explained_variance   | 0.694     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.91e+03  |\n",
      "|    n_updates            | 650       |\n",
      "|    policy_gradient_loss | 0.0909    |\n",
      "|    std                  | 0.932     |\n",
      "|    value_loss           | 8.15e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18.3        |\n",
      "|    ep_rew_mean          | -70.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 246         |\n",
      "|    iterations           | 62          |\n",
      "|    time_elapsed         | 514         |\n",
      "|    total_timesteps      | 126976      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001664391 |\n",
      "|    clip_fraction        | 0.0119      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.35       |\n",
      "|    explained_variance   | 0.504       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 9.7e+03     |\n",
      "|    n_updates            | 660         |\n",
      "|    policy_gradient_loss | -0.0023     |\n",
      "|    std                  | 0.931       |\n",
      "|    value_loss           | 1.14e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.1         |\n",
      "|    ep_rew_mean          | -70.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 247          |\n",
      "|    iterations           | 63           |\n",
      "|    time_elapsed         | 520          |\n",
      "|    total_timesteps      | 129024       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017987187 |\n",
      "|    clip_fraction        | 0.0185       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.35        |\n",
      "|    explained_variance   | 0.465        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.91e+03     |\n",
      "|    n_updates            | 670          |\n",
      "|    policy_gradient_loss | -0.00333     |\n",
      "|    std                  | 0.928        |\n",
      "|    value_loss           | 1.52e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.6         |\n",
      "|    ep_rew_mean          | -70.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 248          |\n",
      "|    iterations           | 64           |\n",
      "|    time_elapsed         | 526          |\n",
      "|    total_timesteps      | 131072       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0046572443 |\n",
      "|    clip_fraction        | 0.0443       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.34        |\n",
      "|    explained_variance   | 0.536        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.09e+04     |\n",
      "|    n_updates            | 680          |\n",
      "|    policy_gradient_loss | -0.000274    |\n",
      "|    std                  | 0.926        |\n",
      "|    value_loss           | 1.33e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.6      |\n",
      "|    ep_rew_mean          | -90.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 250       |\n",
      "|    iterations           | 65        |\n",
      "|    time_elapsed         | 532       |\n",
      "|    total_timesteps      | 133120    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.6831351 |\n",
      "|    clip_fraction        | 0.396     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.34     |\n",
      "|    explained_variance   | 0.607     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 4.41e+03  |\n",
      "|    n_updates            | 690       |\n",
      "|    policy_gradient_loss | 0.0423    |\n",
      "|    std                  | 0.927     |\n",
      "|    value_loss           | 7.89e+03  |\n",
      "---------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 18            |\n",
      "|    ep_rew_mean          | -50.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 250           |\n",
      "|    iterations           | 66            |\n",
      "|    time_elapsed         | 538           |\n",
      "|    total_timesteps      | 135168        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00081852626 |\n",
      "|    clip_fraction        | 0.00522       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.34         |\n",
      "|    explained_variance   | 0.672         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 1.08e+03      |\n",
      "|    n_updates            | 700           |\n",
      "|    policy_gradient_loss | -0.00157      |\n",
      "|    std                  | 0.928         |\n",
      "|    value_loss           | 9.55e+03      |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 15.6        |\n",
      "|    ep_rew_mean          | -100        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 251         |\n",
      "|    iterations           | 67          |\n",
      "|    time_elapsed         | 545         |\n",
      "|    total_timesteps      | 137216      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.075610265 |\n",
      "|    clip_fraction        | 0.0571      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.34       |\n",
      "|    explained_variance   | 0.394       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.02e+03    |\n",
      "|    n_updates            | 710         |\n",
      "|    policy_gradient_loss | 0.00129     |\n",
      "|    std                  | 0.923       |\n",
      "|    value_loss           | 9.82e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.3         |\n",
      "|    ep_rew_mean          | -30.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 252          |\n",
      "|    iterations           | 68           |\n",
      "|    time_elapsed         | 551          |\n",
      "|    total_timesteps      | 139264       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026662173 |\n",
      "|    clip_fraction        | 0.15         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.34        |\n",
      "|    explained_variance   | 0.595        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.03e+04     |\n",
      "|    n_updates            | 720          |\n",
      "|    policy_gradient_loss | 0.0108       |\n",
      "|    std                  | 0.922        |\n",
      "|    value_loss           | 1.65e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 14.8       |\n",
      "|    ep_rew_mean          | -210       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 253        |\n",
      "|    iterations           | 69         |\n",
      "|    time_elapsed         | 557        |\n",
      "|    total_timesteps      | 141312     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.26272345 |\n",
      "|    clip_fraction        | 0.305      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.34      |\n",
      "|    explained_variance   | 0.656      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 4.9e+03    |\n",
      "|    n_updates            | 730        |\n",
      "|    policy_gradient_loss | 0.0334     |\n",
      "|    std                  | 0.92       |\n",
      "|    value_loss           | 6.68e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 15.9        |\n",
      "|    ep_rew_mean          | -250        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 254         |\n",
      "|    iterations           | 70          |\n",
      "|    time_elapsed         | 563         |\n",
      "|    total_timesteps      | 143360      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005212251 |\n",
      "|    clip_fraction        | 0.0437      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.34       |\n",
      "|    explained_variance   | 0.695       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.98e+03    |\n",
      "|    n_updates            | 740         |\n",
      "|    policy_gradient_loss | -0.00742    |\n",
      "|    std                  | 0.921       |\n",
      "|    value_loss           | 1.6e+04     |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.9         |\n",
      "|    ep_rew_mean          | -90.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 255          |\n",
      "|    iterations           | 71           |\n",
      "|    time_elapsed         | 569          |\n",
      "|    total_timesteps      | 145408       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0025893748 |\n",
      "|    clip_fraction        | 0.0146       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.34        |\n",
      "|    explained_variance   | 0.827        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.42e+04     |\n",
      "|    n_updates            | 750          |\n",
      "|    policy_gradient_loss | -0.00254     |\n",
      "|    std                  | 0.918        |\n",
      "|    value_loss           | 1.65e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.3         |\n",
      "|    ep_rew_mean          | -120         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 256          |\n",
      "|    iterations           | 72           |\n",
      "|    time_elapsed         | 575          |\n",
      "|    total_timesteps      | 147456       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019178002 |\n",
      "|    clip_fraction        | 0.0187       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.33        |\n",
      "|    explained_variance   | 0.718        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 8.85e+03     |\n",
      "|    n_updates            | 760          |\n",
      "|    policy_gradient_loss | -0.00242     |\n",
      "|    std                  | 0.915        |\n",
      "|    value_loss           | 1.18e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.2         |\n",
      "|    ep_rew_mean          | -120         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 257          |\n",
      "|    iterations           | 73           |\n",
      "|    time_elapsed         | 581          |\n",
      "|    total_timesteps      | 149504       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0022217026 |\n",
      "|    clip_fraction        | 0.0153       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.33        |\n",
      "|    explained_variance   | 0.694        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.94e+03     |\n",
      "|    n_updates            | 770          |\n",
      "|    policy_gradient_loss | -0.00326     |\n",
      "|    std                  | 0.911        |\n",
      "|    value_loss           | 1.58e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.7         |\n",
      "|    ep_rew_mean          | -170         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 256          |\n",
      "|    iterations           | 74           |\n",
      "|    time_elapsed         | 591          |\n",
      "|    total_timesteps      | 151552       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008721056 |\n",
      "|    clip_fraction        | 0.011        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.32        |\n",
      "|    explained_variance   | 0.7          |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.94e+03     |\n",
      "|    n_updates            | 780          |\n",
      "|    policy_gradient_loss | -0.00233     |\n",
      "|    std                  | 0.91         |\n",
      "|    value_loss           | 1.18e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 18.3         |\n",
      "|    ep_rew_mean          | -120         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 255          |\n",
      "|    iterations           | 75           |\n",
      "|    time_elapsed         | 600          |\n",
      "|    total_timesteps      | 153600       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018673285 |\n",
      "|    clip_fraction        | 0.0123       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.32        |\n",
      "|    explained_variance   | 0.721        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.55e+04     |\n",
      "|    n_updates            | 790          |\n",
      "|    policy_gradient_loss | -0.00172     |\n",
      "|    std                  | 0.906        |\n",
      "|    value_loss           | 1.78e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.2         |\n",
      "|    ep_rew_mean          | -150         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 255          |\n",
      "|    iterations           | 76           |\n",
      "|    time_elapsed         | 609          |\n",
      "|    total_timesteps      | 155648       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014411416 |\n",
      "|    clip_fraction        | 0.03         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.32        |\n",
      "|    explained_variance   | 0.726        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.05e+03     |\n",
      "|    n_updates            | 800          |\n",
      "|    policy_gradient_loss | 0.001        |\n",
      "|    std                  | 0.898        |\n",
      "|    value_loss           | 1.39e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16           |\n",
      "|    ep_rew_mean          | -170         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 255          |\n",
      "|    iterations           | 77           |\n",
      "|    time_elapsed         | 617          |\n",
      "|    total_timesteps      | 157696       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0014923447 |\n",
      "|    clip_fraction        | 0.0223       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.31        |\n",
      "|    explained_variance   | 0.725        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.37e+03     |\n",
      "|    n_updates            | 810          |\n",
      "|    policy_gradient_loss | -0.00205     |\n",
      "|    std                  | 0.895        |\n",
      "|    value_loss           | 1.42e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 16         |\n",
      "|    ep_rew_mean          | -100       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 254        |\n",
      "|    iterations           | 78         |\n",
      "|    time_elapsed         | 626        |\n",
      "|    total_timesteps      | 159744     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00500685 |\n",
      "|    clip_fraction        | 0.0472     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.31      |\n",
      "|    explained_variance   | 0.876      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 9.31e+03   |\n",
      "|    n_updates            | 820        |\n",
      "|    policy_gradient_loss | -0.0041    |\n",
      "|    std                  | 0.893      |\n",
      "|    value_loss           | 7.14e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.4      |\n",
      "|    ep_rew_mean          | -60.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 254       |\n",
      "|    iterations           | 79        |\n",
      "|    time_elapsed         | 635       |\n",
      "|    total_timesteps      | 161792    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0020315 |\n",
      "|    clip_fraction        | 0.0104    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.3      |\n",
      "|    explained_variance   | 0.566     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 5.79e+03  |\n",
      "|    n_updates            | 830       |\n",
      "|    policy_gradient_loss | -0.00273  |\n",
      "|    std                  | 0.888     |\n",
      "|    value_loss           | 1.62e+04  |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 14.9         |\n",
      "|    ep_rew_mean          | -140         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 254          |\n",
      "|    iterations           | 80           |\n",
      "|    time_elapsed         | 644          |\n",
      "|    total_timesteps      | 163840       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0041175163 |\n",
      "|    clip_fraction        | 0.034        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.3         |\n",
      "|    explained_variance   | 0.653        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.2e+03      |\n",
      "|    n_updates            | 840          |\n",
      "|    policy_gradient_loss | -0.000336    |\n",
      "|    std                  | 0.885        |\n",
      "|    value_loss           | 1.21e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.8        |\n",
      "|    ep_rew_mean          | -90.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 254         |\n",
      "|    iterations           | 81          |\n",
      "|    time_elapsed         | 652         |\n",
      "|    total_timesteps      | 165888      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.000873107 |\n",
      "|    clip_fraction        | 0.00337     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.3        |\n",
      "|    explained_variance   | 0.72        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.32e+03    |\n",
      "|    n_updates            | 850         |\n",
      "|    policy_gradient_loss | -5.75e-05   |\n",
      "|    std                  | 0.883       |\n",
      "|    value_loss           | 1.38e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.8         |\n",
      "|    ep_rew_mean          | -70.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 253          |\n",
      "|    iterations           | 82           |\n",
      "|    time_elapsed         | 661          |\n",
      "|    total_timesteps      | 167936       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013835013 |\n",
      "|    clip_fraction        | 0.0147       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.29        |\n",
      "|    explained_variance   | 0.741        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.13e+03     |\n",
      "|    n_updates            | 860          |\n",
      "|    policy_gradient_loss | -0.00179     |\n",
      "|    std                  | 0.881        |\n",
      "|    value_loss           | 8.3e+03      |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 19.3        |\n",
      "|    ep_rew_mean          | -60.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 253         |\n",
      "|    iterations           | 83          |\n",
      "|    time_elapsed         | 670         |\n",
      "|    total_timesteps      | 169984      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001437954 |\n",
      "|    clip_fraction        | 0.0667      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.3        |\n",
      "|    explained_variance   | 0.738       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.09e+04    |\n",
      "|    n_updates            | 870         |\n",
      "|    policy_gradient_loss | 0.00108     |\n",
      "|    std                  | 0.884       |\n",
      "|    value_loss           | 9.74e+03    |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 19            |\n",
      "|    ep_rew_mean          | -60.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 253           |\n",
      "|    iterations           | 84            |\n",
      "|    time_elapsed         | 679           |\n",
      "|    total_timesteps      | 172032        |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00064985093 |\n",
      "|    clip_fraction        | 0.0195        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.29         |\n",
      "|    explained_variance   | 0.345         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 1.08e+04      |\n",
      "|    n_updates            | 880           |\n",
      "|    policy_gradient_loss | -0.000976     |\n",
      "|    std                  | 0.881         |\n",
      "|    value_loss           | 1.4e+04       |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.2         |\n",
      "|    ep_rew_mean          | -70.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 252          |\n",
      "|    iterations           | 85           |\n",
      "|    time_elapsed         | 688          |\n",
      "|    total_timesteps      | 174080       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0053197234 |\n",
      "|    clip_fraction        | 0.0357       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.29        |\n",
      "|    explained_variance   | 0.461        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.99e+03     |\n",
      "|    n_updates            | 890          |\n",
      "|    policy_gradient_loss | 0.00136      |\n",
      "|    std                  | 0.878        |\n",
      "|    value_loss           | 1.05e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.2         |\n",
      "|    ep_rew_mean          | -90.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 252          |\n",
      "|    iterations           | 86           |\n",
      "|    time_elapsed         | 697          |\n",
      "|    total_timesteps      | 176128       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0035333573 |\n",
      "|    clip_fraction        | 0.122        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.29        |\n",
      "|    explained_variance   | 0.572        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 978          |\n",
      "|    n_updates            | 900          |\n",
      "|    policy_gradient_loss | 0.00263      |\n",
      "|    std                  | 0.875        |\n",
      "|    value_loss           | 9.16e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.8         |\n",
      "|    ep_rew_mean          | -30.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 252          |\n",
      "|    iterations           | 87           |\n",
      "|    time_elapsed         | 706          |\n",
      "|    total_timesteps      | 178176       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0037357307 |\n",
      "|    clip_fraction        | 0.0227       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.28        |\n",
      "|    explained_variance   | 0.572        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.24e+03     |\n",
      "|    n_updates            | 910          |\n",
      "|    policy_gradient_loss | -0.000271    |\n",
      "|    std                  | 0.872        |\n",
      "|    value_loss           | 1.56e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.9        |\n",
      "|    ep_rew_mean          | -80.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 252         |\n",
      "|    iterations           | 88          |\n",
      "|    time_elapsed         | 714         |\n",
      "|    total_timesteps      | 180224      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.045134015 |\n",
      "|    clip_fraction        | 0.0889      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.28       |\n",
      "|    explained_variance   | 0.316       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.96e+03    |\n",
      "|    n_updates            | 920         |\n",
      "|    policy_gradient_loss | 0.00512     |\n",
      "|    std                  | 0.866       |\n",
      "|    value_loss           | 5.71e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18.4        |\n",
      "|    ep_rew_mean          | -60.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 251         |\n",
      "|    iterations           | 89          |\n",
      "|    time_elapsed         | 723         |\n",
      "|    total_timesteps      | 182272      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006506418 |\n",
      "|    clip_fraction        | 0.0244      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.27       |\n",
      "|    explained_variance   | 0.456       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.44e+03    |\n",
      "|    n_updates            | 930         |\n",
      "|    policy_gradient_loss | -0.000898   |\n",
      "|    std                  | 0.863       |\n",
      "|    value_loss           | 1.62e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.3         |\n",
      "|    ep_rew_mean          | -50.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 251          |\n",
      "|    iterations           | 90           |\n",
      "|    time_elapsed         | 732          |\n",
      "|    total_timesteps      | 184320       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019122747 |\n",
      "|    clip_fraction        | 0.0296       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.529        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.73e+03     |\n",
      "|    n_updates            | 940          |\n",
      "|    policy_gradient_loss | -0.00642     |\n",
      "|    std                  | 0.861        |\n",
      "|    value_loss           | 1.26e+04     |\n",
      "------------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 15.2     |\n",
      "|    ep_rew_mean          | -90.4    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 251      |\n",
      "|    iterations           | 91       |\n",
      "|    time_elapsed         | 742      |\n",
      "|    total_timesteps      | 186368   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 3.253553 |\n",
      "|    clip_fraction        | 0.513    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.26    |\n",
      "|    explained_variance   | 0.621    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 7.97e+03 |\n",
      "|    n_updates            | 950      |\n",
      "|    policy_gradient_loss | 0.0546   |\n",
      "|    std                  | 0.855    |\n",
      "|    value_loss           | 1.01e+04 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 14.6     |\n",
      "|    ep_rew_mean          | -210     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 251      |\n",
      "|    iterations           | 92       |\n",
      "|    time_elapsed         | 749      |\n",
      "|    total_timesteps      | 188416   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4.968008 |\n",
      "|    clip_fraction        | 0.375    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.27    |\n",
      "|    explained_variance   | 0.755    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 2.74e+03 |\n",
      "|    n_updates            | 960      |\n",
      "|    policy_gradient_loss | 0.0901   |\n",
      "|    std                  | 0.866    |\n",
      "|    value_loss           | 1.12e+04 |\n",
      "--------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 14.5         |\n",
      "|    ep_rew_mean          | -140         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 251          |\n",
      "|    iterations           | 93           |\n",
      "|    time_elapsed         | 756          |\n",
      "|    total_timesteps      | 190464       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013399106 |\n",
      "|    clip_fraction        | 0.00596      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.489        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.04e+03     |\n",
      "|    n_updates            | 970          |\n",
      "|    policy_gradient_loss | 0.000527     |\n",
      "|    std                  | 0.865        |\n",
      "|    value_loss           | 1.9e+04      |\n",
      "------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "model = model.learn(total_timesteps=190000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:-119.40 +/- 323.63\n"
     ]
    }
   ],
   "source": [
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 15.2     |\n",
      "|    ep_rew_mean     | -220     |\n",
      "| time/              |          |\n",
      "|    fps             | 534      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 3        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.6         |\n",
      "|    ep_rew_mean          | -110         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 404          |\n",
      "|    iterations           | 2            |\n",
      "|    time_elapsed         | 10           |\n",
      "|    total_timesteps      | 4096         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030576033 |\n",
      "|    clip_fraction        | 0.0236       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.888        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.1e+04      |\n",
      "|    n_updates            | 990          |\n",
      "|    policy_gradient_loss | -0.00325     |\n",
      "|    std                  | 0.865        |\n",
      "|    value_loss           | 1.04e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 15.6        |\n",
      "|    ep_rew_mean          | -130        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 365         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 16          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024720913 |\n",
      "|    clip_fraction        | 0.0546      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.27       |\n",
      "|    explained_variance   | 0.712       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.94e+03    |\n",
      "|    n_updates            | 1000        |\n",
      "|    policy_gradient_loss | 0.000731    |\n",
      "|    std                  | 0.864       |\n",
      "|    value_loss           | 9.99e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.4         |\n",
      "|    ep_rew_mean          | -100         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 358          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 22           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029398692 |\n",
      "|    clip_fraction        | 0.0157       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.869        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.53e+03     |\n",
      "|    n_updates            | 1010         |\n",
      "|    policy_gradient_loss | -0.00233     |\n",
      "|    std                  | 0.864        |\n",
      "|    value_loss           | 6.56e+03     |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 16.8          |\n",
      "|    ep_rew_mean          | -90.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 356           |\n",
      "|    iterations           | 5             |\n",
      "|    time_elapsed         | 28            |\n",
      "|    total_timesteps      | 10240         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00095544226 |\n",
      "|    clip_fraction        | 0.0132        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.27         |\n",
      "|    explained_variance   | 0.758         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 9.33e+03      |\n",
      "|    n_updates            | 1020          |\n",
      "|    policy_gradient_loss | -0.00222      |\n",
      "|    std                  | 0.863         |\n",
      "|    value_loss           | 1.12e+04      |\n",
      "-------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.2         |\n",
      "|    ep_rew_mean          | -180         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 353          |\n",
      "|    iterations           | 6            |\n",
      "|    time_elapsed         | 34           |\n",
      "|    total_timesteps      | 12288        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027738009 |\n",
      "|    clip_fraction        | 0.0217       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.783        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.73e+03     |\n",
      "|    n_updates            | 1030         |\n",
      "|    policy_gradient_loss | -0.00498     |\n",
      "|    std                  | 0.863        |\n",
      "|    value_loss           | 1.05e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 15          |\n",
      "|    ep_rew_mean          | -170        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 353         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 40          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002988263 |\n",
      "|    clip_fraction        | 0.0153      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.27       |\n",
      "|    explained_variance   | 0.876       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 984         |\n",
      "|    n_updates            | 1040        |\n",
      "|    policy_gradient_loss | -0.00266    |\n",
      "|    std                  | 0.862       |\n",
      "|    value_loss           | 9.23e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.9         |\n",
      "|    ep_rew_mean          | -90.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 351          |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 46           |\n",
      "|    total_timesteps      | 16384        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0034718807 |\n",
      "|    clip_fraction        | 0.031        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.779        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.69e+03     |\n",
      "|    n_updates            | 1050         |\n",
      "|    policy_gradient_loss | -0.00165     |\n",
      "|    std                  | 0.862        |\n",
      "|    value_loss           | 1.42e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.5         |\n",
      "|    ep_rew_mean          | -120         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 348          |\n",
      "|    iterations           | 9            |\n",
      "|    time_elapsed         | 52           |\n",
      "|    total_timesteps      | 18432        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030604494 |\n",
      "|    clip_fraction        | 0.167        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.772        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.27e+03     |\n",
      "|    n_updates            | 1060         |\n",
      "|    policy_gradient_loss | 0.006        |\n",
      "|    std                  | 0.862        |\n",
      "|    value_loss           | 8.43e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.5         |\n",
      "|    ep_rew_mean          | -110         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 346          |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 59           |\n",
      "|    total_timesteps      | 20480        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0063196835 |\n",
      "|    clip_fraction        | 0.0394       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.727        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 8.45e+03     |\n",
      "|    n_updates            | 1070         |\n",
      "|    policy_gradient_loss | -0.0047      |\n",
      "|    std                  | 0.861        |\n",
      "|    value_loss           | 1.29e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.3         |\n",
      "|    ep_rew_mean          | -100         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 345          |\n",
      "|    iterations           | 11           |\n",
      "|    time_elapsed         | 65           |\n",
      "|    total_timesteps      | 22528        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0041748555 |\n",
      "|    clip_fraction        | 0.0284       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.619        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 8.13e+03     |\n",
      "|    n_updates            | 1080         |\n",
      "|    policy_gradient_loss | -0.00556     |\n",
      "|    std                  | 0.859        |\n",
      "|    value_loss           | 1.47e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.8         |\n",
      "|    ep_rew_mean          | -120         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 344          |\n",
      "|    iterations           | 12           |\n",
      "|    time_elapsed         | 71           |\n",
      "|    total_timesteps      | 24576        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012770907 |\n",
      "|    clip_fraction        | 0.00464      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.27        |\n",
      "|    explained_variance   | 0.66         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.7e+03      |\n",
      "|    n_updates            | 1090         |\n",
      "|    policy_gradient_loss | -0.00102     |\n",
      "|    std                  | 0.859        |\n",
      "|    value_loss           | 1.41e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 17         |\n",
      "|    ep_rew_mean          | -70.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 343        |\n",
      "|    iterations           | 13         |\n",
      "|    time_elapsed         | 77         |\n",
      "|    total_timesteps      | 26624      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.17376918 |\n",
      "|    clip_fraction        | 0.522      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.26      |\n",
      "|    explained_variance   | 0.722      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.18e+03   |\n",
      "|    n_updates            | 1100       |\n",
      "|    policy_gradient_loss | 0.0425     |\n",
      "|    std                  | 0.856      |\n",
      "|    value_loss           | 1.45e+04   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 19         |\n",
      "|    ep_rew_mean          | -50.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 342        |\n",
      "|    iterations           | 14         |\n",
      "|    time_elapsed         | 83         |\n",
      "|    total_timesteps      | 28672      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.91415215 |\n",
      "|    clip_fraction        | 0.546      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.26      |\n",
      "|    explained_variance   | 0.63       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.37e+03   |\n",
      "|    n_updates            | 1110       |\n",
      "|    policy_gradient_loss | 0.0362     |\n",
      "|    std                  | 0.853      |\n",
      "|    value_loss           | 1.2e+04    |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17          |\n",
      "|    ep_rew_mean          | -50.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 341         |\n",
      "|    iterations           | 15          |\n",
      "|    time_elapsed         | 89          |\n",
      "|    total_timesteps      | 30720       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013889194 |\n",
      "|    clip_fraction        | 0.106       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.26       |\n",
      "|    explained_variance   | 0.499       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.29e+03    |\n",
      "|    n_updates            | 1120        |\n",
      "|    policy_gradient_loss | 0.0509      |\n",
      "|    std                  | 0.853       |\n",
      "|    value_loss           | 9.59e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 17.1       |\n",
      "|    ep_rew_mean          | -100       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 341        |\n",
      "|    iterations           | 16         |\n",
      "|    time_elapsed         | 96         |\n",
      "|    total_timesteps      | 32768      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.18256924 |\n",
      "|    clip_fraction        | 0.565      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.26      |\n",
      "|    explained_variance   | 0.404      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.6e+03    |\n",
      "|    n_updates            | 1130       |\n",
      "|    policy_gradient_loss | 0.0948     |\n",
      "|    std                  | 0.85       |\n",
      "|    value_loss           | 9.92e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 14.4       |\n",
      "|    ep_rew_mean          | -120       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 340        |\n",
      "|    iterations           | 17         |\n",
      "|    time_elapsed         | 102        |\n",
      "|    total_timesteps      | 34816      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.30766445 |\n",
      "|    clip_fraction        | 0.264      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.26      |\n",
      "|    explained_variance   | 0.615      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.13e+04   |\n",
      "|    n_updates            | 1140       |\n",
      "|    policy_gradient_loss | 0.0164     |\n",
      "|    std                  | 0.848      |\n",
      "|    value_loss           | 1.01e+04   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.2         |\n",
      "|    ep_rew_mean          | -120         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 340          |\n",
      "|    iterations           | 18           |\n",
      "|    time_elapsed         | 108          |\n",
      "|    total_timesteps      | 36864        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0007013856 |\n",
      "|    clip_fraction        | 0.0128       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.25        |\n",
      "|    explained_variance   | 0.752        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.44e+03     |\n",
      "|    n_updates            | 1150         |\n",
      "|    policy_gradient_loss | -0.000551    |\n",
      "|    std                  | 0.846        |\n",
      "|    value_loss           | 1.03e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.3         |\n",
      "|    ep_rew_mean          | -130         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 339          |\n",
      "|    iterations           | 19           |\n",
      "|    time_elapsed         | 114          |\n",
      "|    total_timesteps      | 38912        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0036195773 |\n",
      "|    clip_fraction        | 0.0123       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.25        |\n",
      "|    explained_variance   | 0.858        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 666          |\n",
      "|    n_updates            | 1160         |\n",
      "|    policy_gradient_loss | -0.00151     |\n",
      "|    std                  | 0.844        |\n",
      "|    value_loss           | 8.54e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 18.2          |\n",
      "|    ep_rew_mean          | -70.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 339           |\n",
      "|    iterations           | 20            |\n",
      "|    time_elapsed         | 120           |\n",
      "|    total_timesteps      | 40960         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00032963022 |\n",
      "|    clip_fraction        | 0.00459       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.25         |\n",
      "|    explained_variance   | 0.707         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 7.52e+03      |\n",
      "|    n_updates            | 1170          |\n",
      "|    policy_gradient_loss | 1.57e-05      |\n",
      "|    std                  | 0.843         |\n",
      "|    value_loss           | 1.35e+04      |\n",
      "-------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 17.3       |\n",
      "|    ep_rew_mean          | -80.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 338        |\n",
      "|    iterations           | 21         |\n",
      "|    time_elapsed         | 126        |\n",
      "|    total_timesteps      | 43008      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.22986487 |\n",
      "|    clip_fraction        | 0.432      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.25      |\n",
      "|    explained_variance   | 0.788      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.03e+03   |\n",
      "|    n_updates            | 1180       |\n",
      "|    policy_gradient_loss | 0.0698     |\n",
      "|    std                  | 0.842      |\n",
      "|    value_loss           | 7.89e+03   |\n",
      "----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 17.5          |\n",
      "|    ep_rew_mean          | -50.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 338           |\n",
      "|    iterations           | 22            |\n",
      "|    time_elapsed         | 133           |\n",
      "|    total_timesteps      | 45056         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00043431175 |\n",
      "|    clip_fraction        | 0.00186       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.25         |\n",
      "|    explained_variance   | 0.664         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 1.64e+04      |\n",
      "|    n_updates            | 1190          |\n",
      "|    policy_gradient_loss | -0.000212     |\n",
      "|    std                  | 0.841         |\n",
      "|    value_loss           | 1.22e+04      |\n",
      "-------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 17         |\n",
      "|    ep_rew_mean          | -60.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 337        |\n",
      "|    iterations           | 23         |\n",
      "|    time_elapsed         | 139        |\n",
      "|    total_timesteps      | 47104      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.07164016 |\n",
      "|    clip_fraction        | 0.498      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.24      |\n",
      "|    explained_variance   | 0.743      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 7.14e+03   |\n",
      "|    n_updates            | 1200       |\n",
      "|    policy_gradient_loss | 0.0415     |\n",
      "|    std                  | 0.832      |\n",
      "|    value_loss           | 5.87e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -80.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 337       |\n",
      "|    iterations           | 24        |\n",
      "|    time_elapsed         | 145       |\n",
      "|    total_timesteps      | 49152     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.2291499 |\n",
      "|    clip_fraction        | 0.675     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | 0.495     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 4.81e+03  |\n",
      "|    n_updates            | 1210      |\n",
      "|    policy_gradient_loss | 0.0356    |\n",
      "|    std                  | 0.838     |\n",
      "|    value_loss           | 8.72e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 14.9        |\n",
      "|    ep_rew_mean          | -100        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 337         |\n",
      "|    iterations           | 25          |\n",
      "|    time_elapsed         | 151         |\n",
      "|    total_timesteps      | 51200       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.023686241 |\n",
      "|    clip_fraction        | 0.173       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.24       |\n",
      "|    explained_variance   | 0.779       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 203         |\n",
      "|    n_updates            | 1220        |\n",
      "|    policy_gradient_loss | 0.00941     |\n",
      "|    std                  | 0.837       |\n",
      "|    value_loss           | 4.95e+03    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 19.1      |\n",
      "|    ep_rew_mean          | -90.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 336       |\n",
      "|    iterations           | 26        |\n",
      "|    time_elapsed         | 158       |\n",
      "|    total_timesteps      | 53248     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 6.4456167 |\n",
      "|    clip_fraction        | 0.528     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | 0.944     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.67e+03  |\n",
      "|    n_updates            | 1230      |\n",
      "|    policy_gradient_loss | 0.0452    |\n",
      "|    std                  | 0.841     |\n",
      "|    value_loss           | 1.93e+03  |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 19.8         |\n",
      "|    ep_rew_mean          | -100         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 336          |\n",
      "|    iterations           | 27           |\n",
      "|    time_elapsed         | 164          |\n",
      "|    total_timesteps      | 55296        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029918335 |\n",
      "|    clip_fraction        | 0.0403       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.25        |\n",
      "|    explained_variance   | 0.743        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.01e+03     |\n",
      "|    n_updates            | 1240         |\n",
      "|    policy_gradient_loss | 0.00184      |\n",
      "|    std                  | 0.84         |\n",
      "|    value_loss           | 8.37e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 19.3       |\n",
      "|    ep_rew_mean          | -180       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 336        |\n",
      "|    iterations           | 28         |\n",
      "|    time_elapsed         | 170        |\n",
      "|    total_timesteps      | 57344      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.08371131 |\n",
      "|    clip_fraction        | 0.208      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.25      |\n",
      "|    explained_variance   | 0.55       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.69e+03   |\n",
      "|    n_updates            | 1250       |\n",
      "|    policy_gradient_loss | 0.000333   |\n",
      "|    std                  | 0.841      |\n",
      "|    value_loss           | 1.28e+04   |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 18.9        |\n",
      "|    ep_rew_mean          | -130        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 335         |\n",
      "|    iterations           | 29          |\n",
      "|    time_elapsed         | 176         |\n",
      "|    total_timesteps      | 59392       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.021674696 |\n",
      "|    clip_fraction        | 0.141       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.24       |\n",
      "|    explained_variance   | 0.811       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.54e+03    |\n",
      "|    n_updates            | 1260        |\n",
      "|    policy_gradient_loss | 0.004       |\n",
      "|    std                  | 0.839       |\n",
      "|    value_loss           | 1.11e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 19.9         |\n",
      "|    ep_rew_mean          | -140         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 335          |\n",
      "|    iterations           | 30           |\n",
      "|    time_elapsed         | 183          |\n",
      "|    total_timesteps      | 61440        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008772037 |\n",
      "|    clip_fraction        | 0.00669      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.76         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.38e+03     |\n",
      "|    n_updates            | 1270         |\n",
      "|    policy_gradient_loss | -0.00361     |\n",
      "|    std                  | 0.839        |\n",
      "|    value_loss           | 1.06e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 21.5         |\n",
      "|    ep_rew_mean          | -30.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 335          |\n",
      "|    iterations           | 31           |\n",
      "|    time_elapsed         | 189          |\n",
      "|    total_timesteps      | 63488        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0004612044 |\n",
      "|    clip_fraction        | 0.00166      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.726        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.16e+04     |\n",
      "|    n_updates            | 1280         |\n",
      "|    policy_gradient_loss | -0.00132     |\n",
      "|    std                  | 0.839        |\n",
      "|    value_loss           | 1.19e+04     |\n",
      "------------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.5     |\n",
      "|    ep_rew_mean          | -100     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 335      |\n",
      "|    iterations           | 32       |\n",
      "|    time_elapsed         | 195      |\n",
      "|    total_timesteps      | 65536    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 5.32045  |\n",
      "|    clip_fraction        | 0.61     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.25    |\n",
      "|    explained_variance   | 0.219    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.13e+03 |\n",
      "|    n_updates            | 1290     |\n",
      "|    policy_gradient_loss | 0.0524   |\n",
      "|    std                  | 0.839    |\n",
      "|    value_loss           | 4.81e+03 |\n",
      "--------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 18.5         |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 334          |\n",
      "|    iterations           | 33           |\n",
      "|    time_elapsed         | 201          |\n",
      "|    total_timesteps      | 67584        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0034264608 |\n",
      "|    clip_fraction        | 0.03         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.541        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.57e+03     |\n",
      "|    n_updates            | 1300         |\n",
      "|    policy_gradient_loss | 0.0026       |\n",
      "|    std                  | 0.838        |\n",
      "|    value_loss           | 1.05e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 20           |\n",
      "|    ep_rew_mean          | -30.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 334          |\n",
      "|    iterations           | 34           |\n",
      "|    time_elapsed         | 208          |\n",
      "|    total_timesteps      | 69632        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0003908361 |\n",
      "|    clip_fraction        | 0.0019       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.314        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.8e+03      |\n",
      "|    n_updates            | 1310         |\n",
      "|    policy_gradient_loss | -0.00103     |\n",
      "|    std                  | 0.838        |\n",
      "|    value_loss           | 1.51e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.5         |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 334          |\n",
      "|    iterations           | 35           |\n",
      "|    time_elapsed         | 214          |\n",
      "|    total_timesteps      | 71680        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0021580146 |\n",
      "|    clip_fraction        | 0.0063       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.074        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.59e+03     |\n",
      "|    n_updates            | 1320         |\n",
      "|    policy_gradient_loss | -0.00079     |\n",
      "|    std                  | 0.836        |\n",
      "|    value_loss           | 8.27e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.4        |\n",
      "|    ep_rew_mean          | -90.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 333         |\n",
      "|    iterations           | 36          |\n",
      "|    time_elapsed         | 220         |\n",
      "|    total_timesteps      | 73728       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008882189 |\n",
      "|    clip_fraction        | 0.131       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.24       |\n",
      "|    explained_variance   | 0.307       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.24e+03    |\n",
      "|    n_updates            | 1330        |\n",
      "|    policy_gradient_loss | 0.00413     |\n",
      "|    std                  | 0.836       |\n",
      "|    value_loss           | 5.56e+03    |\n",
      "-----------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 17.7          |\n",
      "|    ep_rew_mean          | -70.4         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 333           |\n",
      "|    iterations           | 37            |\n",
      "|    time_elapsed         | 226           |\n",
      "|    total_timesteps      | 75776         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00016447331 |\n",
      "|    clip_fraction        | 0.000439      |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.24         |\n",
      "|    explained_variance   | 0.638         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 6.01e+03      |\n",
      "|    n_updates            | 1340          |\n",
      "|    policy_gradient_loss | -0.000674     |\n",
      "|    std                  | 0.836         |\n",
      "|    value_loss           | 1.28e+04      |\n",
      "-------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 16.3       |\n",
      "|    ep_rew_mean          | -90.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 333        |\n",
      "|    iterations           | 38         |\n",
      "|    time_elapsed         | 233        |\n",
      "|    total_timesteps      | 77824      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00078827 |\n",
      "|    clip_fraction        | 0.00127    |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.24      |\n",
      "|    explained_variance   | 0.488      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.7e+03    |\n",
      "|    n_updates            | 1350       |\n",
      "|    policy_gradient_loss | -0.000861  |\n",
      "|    std                  | 0.836      |\n",
      "|    value_loss           | 1.21e+04   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 17.9         |\n",
      "|    ep_rew_mean          | -40.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 333          |\n",
      "|    iterations           | 39           |\n",
      "|    time_elapsed         | 239          |\n",
      "|    total_timesteps      | 79872        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0005934794 |\n",
      "|    clip_fraction        | 0.0104       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.545        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.8e+03      |\n",
      "|    n_updates            | 1360         |\n",
      "|    policy_gradient_loss | 0.00031      |\n",
      "|    std                  | 0.835        |\n",
      "|    value_loss           | 1.55e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 17.3       |\n",
      "|    ep_rew_mean          | -20.5      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 333        |\n",
      "|    iterations           | 40         |\n",
      "|    time_elapsed         | 245        |\n",
      "|    total_timesteps      | 81920      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.11999487 |\n",
      "|    clip_fraction        | 0.411      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.24      |\n",
      "|    explained_variance   | 0.394      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 346        |\n",
      "|    n_updates            | 1370       |\n",
      "|    policy_gradient_loss | 0.0383     |\n",
      "|    std                  | 0.841      |\n",
      "|    value_loss           | 8.48e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17        |\n",
      "|    ep_rew_mean          | -50.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 333       |\n",
      "|    iterations           | 41        |\n",
      "|    time_elapsed         | 252       |\n",
      "|    total_timesteps      | 83968     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.4608898 |\n",
      "|    clip_fraction        | 0.327     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.25     |\n",
      "|    explained_variance   | 0.192     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.41e+03  |\n",
      "|    n_updates            | 1380      |\n",
      "|    policy_gradient_loss | 0.0481    |\n",
      "|    std                  | 0.844     |\n",
      "|    value_loss           | 8.37e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -60.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 332       |\n",
      "|    iterations           | 42        |\n",
      "|    time_elapsed         | 258       |\n",
      "|    total_timesteps      | 86016     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.5332658 |\n",
      "|    clip_fraction        | 0.173     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.25     |\n",
      "|    explained_variance   | 0.301     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.82e+03  |\n",
      "|    n_updates            | 1390      |\n",
      "|    policy_gradient_loss | 0.00893   |\n",
      "|    std                  | 0.842     |\n",
      "|    value_loss           | 1.24e+04  |\n",
      "---------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 16.6          |\n",
      "|    ep_rew_mean          | -20.5         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 332           |\n",
      "|    iterations           | 43            |\n",
      "|    time_elapsed         | 264           |\n",
      "|    total_timesteps      | 88064         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00043170937 |\n",
      "|    clip_fraction        | 0.00771       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.25         |\n",
      "|    explained_variance   | 0.388         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 2.38e+03      |\n",
      "|    n_updates            | 1400          |\n",
      "|    policy_gradient_loss | -0.00316      |\n",
      "|    std                  | 0.842         |\n",
      "|    value_loss           | 1.51e+04      |\n",
      "-------------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -50.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 332       |\n",
      "|    iterations           | 44        |\n",
      "|    time_elapsed         | 271       |\n",
      "|    total_timesteps      | 90112     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 15.683327 |\n",
      "|    clip_fraction        | 0.649     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.25     |\n",
      "|    explained_variance   | 0.347     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.76e+03  |\n",
      "|    n_updates            | 1410      |\n",
      "|    policy_gradient_loss | 0.0294    |\n",
      "|    std                  | 0.849     |\n",
      "|    value_loss           | 5.11e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.2      |\n",
      "|    ep_rew_mean          | -60.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 331       |\n",
      "|    iterations           | 45        |\n",
      "|    time_elapsed         | 278       |\n",
      "|    total_timesteps      | 92160     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 17.898026 |\n",
      "|    clip_fraction        | 0.218     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.26     |\n",
      "|    explained_variance   | 0.239     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.12e+03  |\n",
      "|    n_updates            | 1420      |\n",
      "|    policy_gradient_loss | 0.016     |\n",
      "|    std                  | 0.848     |\n",
      "|    value_loss           | 9.25e+03  |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 15.5         |\n",
      "|    ep_rew_mean          | -70.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 330          |\n",
      "|    iterations           | 46           |\n",
      "|    time_elapsed         | 284          |\n",
      "|    total_timesteps      | 94208        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0025878055 |\n",
      "|    clip_fraction        | 0.0459       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.25        |\n",
      "|    explained_variance   | 0.369        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.36e+03     |\n",
      "|    n_updates            | 1430         |\n",
      "|    policy_gradient_loss | -0.00167     |\n",
      "|    std                  | 0.848        |\n",
      "|    value_loss           | 1.24e+04     |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 17.5          |\n",
      "|    ep_rew_mean          | -10.5         |\n",
      "| time/                   |               |\n",
      "|    fps                  | 330           |\n",
      "|    iterations           | 47            |\n",
      "|    time_elapsed         | 291           |\n",
      "|    total_timesteps      | 96256         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00027542253 |\n",
      "|    clip_fraction        | 0.00156       |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | -1.25         |\n",
      "|    explained_variance   | 0.538         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 8.63e+03      |\n",
      "|    n_updates            | 1440          |\n",
      "|    policy_gradient_loss | -0.00123      |\n",
      "|    std                  | 0.848         |\n",
      "|    value_loss           | 1.24e+04      |\n",
      "-------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 15.8        |\n",
      "|    ep_rew_mean          | -40.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 330         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 297         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011137854 |\n",
      "|    clip_fraction        | 0.126       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.25       |\n",
      "|    explained_variance   | -0.706      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 63.3        |\n",
      "|    n_updates            | 1450        |\n",
      "|    policy_gradient_loss | 0.01        |\n",
      "|    std                  | 0.832       |\n",
      "|    value_loss           | 3.75e+03    |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.9     |\n",
      "|    ep_rew_mean          | -70.4    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 330      |\n",
      "|    iterations           | 49       |\n",
      "|    time_elapsed         | 303      |\n",
      "|    total_timesteps      | 100352   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 9.661301 |\n",
      "|    clip_fraction        | 0.835    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.24    |\n",
      "|    explained_variance   | 0.237    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 3.17e+03 |\n",
      "|    n_updates            | 1460     |\n",
      "|    policy_gradient_loss | 0.0628   |\n",
      "|    std                  | 0.837    |\n",
      "|    value_loss           | 8.55e+03 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.3     |\n",
      "|    ep_rew_mean          | -50.4    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 330      |\n",
      "|    iterations           | 50       |\n",
      "|    time_elapsed         | 310      |\n",
      "|    total_timesteps      | 102400   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 9.157036 |\n",
      "|    clip_fraction        | 0.466    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.24    |\n",
      "|    explained_variance   | 0.428    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 4.85e+03 |\n",
      "|    n_updates            | 1470     |\n",
      "|    policy_gradient_loss | 0.0243   |\n",
      "|    std                  | 0.836    |\n",
      "|    value_loss           | 1.31e+04 |\n",
      "--------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.2         |\n",
      "|    ep_rew_mean          | -60.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 329          |\n",
      "|    iterations           | 51           |\n",
      "|    time_elapsed         | 316          |\n",
      "|    total_timesteps      | 104448       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0006219037 |\n",
      "|    clip_fraction        | 0.00371      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.357        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.07e+03     |\n",
      "|    n_updates            | 1480         |\n",
      "|    policy_gradient_loss | -0.00143     |\n",
      "|    std                  | 0.835        |\n",
      "|    value_loss           | 1.31e+04     |\n",
      "------------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -30.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 329       |\n",
      "|    iterations           | 52        |\n",
      "|    time_elapsed         | 323       |\n",
      "|    total_timesteps      | 106496    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.2060506 |\n",
      "|    clip_fraction        | 0.521     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | 0.455     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.98e+03  |\n",
      "|    n_updates            | 1490      |\n",
      "|    policy_gradient_loss | 0.0786    |\n",
      "|    std                  | 0.834     |\n",
      "|    value_loss           | 1.25e+04  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 16.8       |\n",
      "|    ep_rew_mean          | -40.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 327        |\n",
      "|    iterations           | 53         |\n",
      "|    time_elapsed         | 330        |\n",
      "|    total_timesteps      | 108544     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.15811458 |\n",
      "|    clip_fraction        | 0.511      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.24      |\n",
      "|    explained_variance   | 0.499      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 955        |\n",
      "|    n_updates            | 1500       |\n",
      "|    policy_gradient_loss | 0.0203     |\n",
      "|    std                  | 0.839      |\n",
      "|    value_loss           | 5.06e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 16.3         |\n",
      "|    ep_rew_mean          | -30.4        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 327          |\n",
      "|    iterations           | 54           |\n",
      "|    time_elapsed         | 337          |\n",
      "|    total_timesteps      | 110592       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0019242942 |\n",
      "|    clip_fraction        | 0.00928      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.288        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.56e+03     |\n",
      "|    n_updates            | 1510         |\n",
      "|    policy_gradient_loss | -0.002       |\n",
      "|    std                  | 0.839        |\n",
      "|    value_loss           | 9.3e+03      |\n",
      "------------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.8     |\n",
      "|    ep_rew_mean          | -30.4    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 327      |\n",
      "|    iterations           | 55       |\n",
      "|    time_elapsed         | 344      |\n",
      "|    total_timesteps      | 112640   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 38.93719 |\n",
      "|    clip_fraction        | 0.87     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.24    |\n",
      "|    explained_variance   | 0.233    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.33e+04 |\n",
      "|    n_updates            | 1520     |\n",
      "|    policy_gradient_loss | 0.0658   |\n",
      "|    std                  | 0.837    |\n",
      "|    value_loss           | 5.68e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 327       |\n",
      "|    iterations           | 56        |\n",
      "|    time_elapsed         | 350       |\n",
      "|    total_timesteps      | 114688    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.1586404 |\n",
      "|    clip_fraction        | 0.0574    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | 0.196     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 7.51e+03  |\n",
      "|    n_updates            | 1530      |\n",
      "|    policy_gradient_loss | 0.0165    |\n",
      "|    std                  | 0.838     |\n",
      "|    value_loss           | 9.87e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 327       |\n",
      "|    iterations           | 57        |\n",
      "|    time_elapsed         | 356       |\n",
      "|    total_timesteps      | 116736    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 23.558586 |\n",
      "|    clip_fraction        | 0.807     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | 0.268     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.04e+03  |\n",
      "|    n_updates            | 1540      |\n",
      "|    policy_gradient_loss | 0.0191    |\n",
      "|    std                  | 0.833     |\n",
      "|    value_loss           | 7.81e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 17.5       |\n",
      "|    ep_rew_mean          | -20.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 326        |\n",
      "|    iterations           | 58         |\n",
      "|    time_elapsed         | 363        |\n",
      "|    total_timesteps      | 118784     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.16804847 |\n",
      "|    clip_fraction        | 0.265      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.24      |\n",
      "|    explained_variance   | 0.135      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.989      |\n",
      "|    n_updates            | 1550       |\n",
      "|    policy_gradient_loss | 0.0411     |\n",
      "|    std                  | 0.841      |\n",
      "|    value_loss           | 2.91e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 16.5        |\n",
      "|    ep_rew_mean          | -40.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 326         |\n",
      "|    iterations           | 59          |\n",
      "|    time_elapsed         | 369         |\n",
      "|    total_timesteps      | 120832      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014975655 |\n",
      "|    clip_fraction        | 0.178       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.24       |\n",
      "|    explained_variance   | 0.0651      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 18.7        |\n",
      "|    n_updates            | 1560        |\n",
      "|    policy_gradient_loss | 0.0135      |\n",
      "|    std                  | 0.839       |\n",
      "|    value_loss           | 6.13e+03    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.4      |\n",
      "|    ep_rew_mean          | -20.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 60        |\n",
      "|    time_elapsed         | 375       |\n",
      "|    total_timesteps      | 122880    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.8370298 |\n",
      "|    clip_fraction        | 0.191     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | 0.0564    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.88e+03  |\n",
      "|    n_updates            | 1570      |\n",
      "|    policy_gradient_loss | 0.0162    |\n",
      "|    std                  | 0.836     |\n",
      "|    value_loss           | 1.01e+04  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -20.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 61        |\n",
      "|    time_elapsed         | 382       |\n",
      "|    total_timesteps      | 124928    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 14.486699 |\n",
      "|    clip_fraction        | 0.802     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | 0.0446    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.59e+03  |\n",
      "|    n_updates            | 1580      |\n",
      "|    policy_gradient_loss | 0.0466    |\n",
      "|    std                  | 0.828     |\n",
      "|    value_loss           | 4.52e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.7      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 62        |\n",
      "|    time_elapsed         | 388       |\n",
      "|    total_timesteps      | 126976    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.3157317 |\n",
      "|    clip_fraction        | 0.177     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.23     |\n",
      "|    explained_variance   | 0.16      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.09e+03  |\n",
      "|    n_updates            | 1590      |\n",
      "|    policy_gradient_loss | 0.00893   |\n",
      "|    std                  | 0.829     |\n",
      "|    value_loss           | 7.27e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.1      |\n",
      "|    ep_rew_mean          | -20.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 63        |\n",
      "|    time_elapsed         | 394       |\n",
      "|    total_timesteps      | 129024    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 75.193275 |\n",
      "|    clip_fraction        | 0.818     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.24     |\n",
      "|    explained_variance   | -0.183    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 13.4      |\n",
      "|    n_updates            | 1600      |\n",
      "|    policy_gradient_loss | 0.0154    |\n",
      "|    std                  | 0.832     |\n",
      "|    value_loss           | 2.43e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.5     |\n",
      "|    ep_rew_mean          | -20.5    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 326      |\n",
      "|    iterations           | 64       |\n",
      "|    time_elapsed         | 401      |\n",
      "|    total_timesteps      | 131072   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 71.71587 |\n",
      "|    clip_fraction        | 0.878    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.23    |\n",
      "|    explained_variance   | 0.105    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 6.75e+03 |\n",
      "|    n_updates            | 1610     |\n",
      "|    policy_gradient_loss | 0.0658   |\n",
      "|    std                  | 0.83     |\n",
      "|    value_loss           | 5.3e+03  |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.8        |\n",
      "|    ep_rew_mean          | -10.5       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 326         |\n",
      "|    iterations           | 65          |\n",
      "|    time_elapsed         | 407         |\n",
      "|    total_timesteps      | 133120      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011046263 |\n",
      "|    clip_fraction        | 0.0539      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.23       |\n",
      "|    explained_variance   | 0.138       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.33e+03    |\n",
      "|    n_updates            | 1620        |\n",
      "|    policy_gradient_loss | 0.0005      |\n",
      "|    std                  | 0.829       |\n",
      "|    value_loss           | 7.13e+03    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 15.7      |\n",
      "|    ep_rew_mean          | -30.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 66        |\n",
      "|    time_elapsed         | 414       |\n",
      "|    total_timesteps      | 135168    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 844.36743 |\n",
      "|    clip_fraction        | 0.829     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.21     |\n",
      "|    explained_variance   | 0.0807    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.22e+03  |\n",
      "|    n_updates            | 1630      |\n",
      "|    policy_gradient_loss | 0.0271    |\n",
      "|    std                  | 0.792     |\n",
      "|    value_loss           | 2.79e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 19.9     |\n",
      "|    ep_rew_mean          | -10.4    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 326      |\n",
      "|    iterations           | 67       |\n",
      "|    time_elapsed         | 420      |\n",
      "|    total_timesteps      | 137216   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 963.8563 |\n",
      "|    clip_fraction        | 0.501    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.19    |\n",
      "|    explained_variance   | 0.0396   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 6.4e+03  |\n",
      "|    n_updates            | 1640     |\n",
      "|    policy_gradient_loss | 0.069    |\n",
      "|    std                  | 0.792    |\n",
      "|    value_loss           | 9.59e+03 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17        |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 68        |\n",
      "|    time_elapsed         | 426       |\n",
      "|    total_timesteps      | 139264    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 293.87463 |\n",
      "|    clip_fraction        | 0.786     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.18     |\n",
      "|    explained_variance   | -0.11     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.64      |\n",
      "|    n_updates            | 1650      |\n",
      "|    policy_gradient_loss | 0.00663   |\n",
      "|    std                  | 0.781     |\n",
      "|    value_loss           | 3.14e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -20.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 69        |\n",
      "|    time_elapsed         | 433       |\n",
      "|    total_timesteps      | 141312    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 309.51773 |\n",
      "|    clip_fraction        | 0.873     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.17     |\n",
      "|    explained_variance   | 0.0517    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.23e+04  |\n",
      "|    n_updates            | 1660      |\n",
      "|    policy_gradient_loss | 0.0487    |\n",
      "|    std                  | 0.782     |\n",
      "|    value_loss           | 5.82e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 326       |\n",
      "|    iterations           | 70        |\n",
      "|    time_elapsed         | 439       |\n",
      "|    total_timesteps      | 143360    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 174.18985 |\n",
      "|    clip_fraction        | 0.899     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.17     |\n",
      "|    explained_variance   | 0.0619    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.87e+03  |\n",
      "|    n_updates            | 1670      |\n",
      "|    policy_gradient_loss | 0.0595    |\n",
      "|    std                  | 0.783     |\n",
      "|    value_loss           | 5.91e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.8      |\n",
      "|    ep_rew_mean          | -40.4     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 71        |\n",
      "|    time_elapsed         | 446       |\n",
      "|    total_timesteps      | 145408    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1129.9675 |\n",
      "|    clip_fraction        | 0.907     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.18     |\n",
      "|    explained_variance   | -11.4     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.834     |\n",
      "|    n_updates            | 1680      |\n",
      "|    policy_gradient_loss | 0.0274    |\n",
      "|    std                  | 0.798     |\n",
      "|    value_loss           | 16.3      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.2      |\n",
      "|    ep_rew_mean          | -0.463    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 72        |\n",
      "|    time_elapsed         | 452       |\n",
      "|    total_timesteps      | 147456    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 14.256566 |\n",
      "|    clip_fraction        | 0.757     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.2      |\n",
      "|    explained_variance   | 0.0384    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.05e+03  |\n",
      "|    n_updates            | 1690      |\n",
      "|    policy_gradient_loss | 0.0814    |\n",
      "|    std                  | 0.8       |\n",
      "|    value_loss           | 1.21e+04  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.9     |\n",
      "|    ep_rew_mean          | -10.5    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 325      |\n",
      "|    iterations           | 73       |\n",
      "|    time_elapsed         | 458      |\n",
      "|    total_timesteps      | 149504   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 602.2821 |\n",
      "|    clip_fraction        | 0.855    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.2     |\n",
      "|    explained_variance   | -10.1    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 2.93     |\n",
      "|    n_updates            | 1700     |\n",
      "|    policy_gradient_loss | -0.0276  |\n",
      "|    std                  | 0.806    |\n",
      "|    value_loss           | 52.1     |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.3      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 74        |\n",
      "|    time_elapsed         | 465       |\n",
      "|    total_timesteps      | 151552    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 872.70215 |\n",
      "|    clip_fraction        | 0.883     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.2      |\n",
      "|    explained_variance   | 0.0524    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1         |\n",
      "|    n_updates            | 1710      |\n",
      "|    policy_gradient_loss | 0.0312    |\n",
      "|    std                  | 0.801     |\n",
      "|    value_loss           | 3.08e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.7      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 75        |\n",
      "|    time_elapsed         | 471       |\n",
      "|    total_timesteps      | 153600    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1191.7906 |\n",
      "|    clip_fraction        | 0.947     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.21     |\n",
      "|    explained_variance   | 0.0103    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.557     |\n",
      "|    n_updates            | 1720      |\n",
      "|    policy_gradient_loss | 0.127     |\n",
      "|    std                  | 0.823     |\n",
      "|    value_loss           | 2.95e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.9     |\n",
      "|    ep_rew_mean          | -20.5    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 325      |\n",
      "|    iterations           | 76       |\n",
      "|    time_elapsed         | 477      |\n",
      "|    total_timesteps      | 155648   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 6188.624 |\n",
      "|    clip_fraction        | 0.956    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.21    |\n",
      "|    explained_variance   | 0.0144   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.515    |\n",
      "|    n_updates            | 1730     |\n",
      "|    policy_gradient_loss | 0.0593   |\n",
      "|    std                  | 0.802    |\n",
      "|    value_loss           | 3.11e+03 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.7     |\n",
      "|    ep_rew_mean          | -0.462   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 325      |\n",
      "|    iterations           | 77       |\n",
      "|    time_elapsed         | 484      |\n",
      "|    total_timesteps      | 157696   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 47.23161 |\n",
      "|    clip_fraction        | 0.927    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.2     |\n",
      "|    explained_variance   | 0.0104   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 26.9     |\n",
      "|    n_updates            | 1740     |\n",
      "|    policy_gradient_loss | 0.112    |\n",
      "|    std                  | 0.8      |\n",
      "|    value_loss           | 5.15e+03 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.3      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 78        |\n",
      "|    time_elapsed         | 490       |\n",
      "|    total_timesteps      | 159744    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4219.3535 |\n",
      "|    clip_fraction        | 0.962     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.2      |\n",
      "|    explained_variance   | -17.5     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0329    |\n",
      "|    n_updates            | 1750      |\n",
      "|    policy_gradient_loss | -0.0118   |\n",
      "|    std                  | 0.802     |\n",
      "|    value_loss           | 0.793     |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 15.9      |\n",
      "|    ep_rew_mean          | -0.464    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 79        |\n",
      "|    time_elapsed         | 497       |\n",
      "|    total_timesteps      | 161792    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3562.6875 |\n",
      "|    clip_fraction        | 0.946     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.19     |\n",
      "|    explained_variance   | 0.00815   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 4.76e+03  |\n",
      "|    n_updates            | 1760      |\n",
      "|    policy_gradient_loss | 0.0523    |\n",
      "|    std                  | 0.794     |\n",
      "|    value_loss           | 3.35e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.8     |\n",
      "|    ep_rew_mean          | -30.4    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 325      |\n",
      "|    iterations           | 80       |\n",
      "|    time_elapsed         | 503      |\n",
      "|    total_timesteps      | 163840   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4090.773 |\n",
      "|    clip_fraction        | 0.968    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.18    |\n",
      "|    explained_variance   | -13.1    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.0774   |\n",
      "|    n_updates            | 1770     |\n",
      "|    policy_gradient_loss | 0.0952   |\n",
      "|    std                  | 0.768    |\n",
      "|    value_loss           | 0.0692   |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.6      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 81        |\n",
      "|    time_elapsed         | 510       |\n",
      "|    total_timesteps      | 165888    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 483.10657 |\n",
      "|    clip_fraction        | 0.84      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.15     |\n",
      "|    explained_variance   | 0.00149   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.91e+03  |\n",
      "|    n_updates            | 1780      |\n",
      "|    policy_gradient_loss | 0.108     |\n",
      "|    std                  | 0.768     |\n",
      "|    value_loss           | 7.44e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.6      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 82        |\n",
      "|    time_elapsed         | 516       |\n",
      "|    total_timesteps      | 167936    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 374.41458 |\n",
      "|    clip_fraction        | 0.889     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.15     |\n",
      "|    explained_variance   | -12.1     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 4.31      |\n",
      "|    n_updates            | 1790      |\n",
      "|    policy_gradient_loss | -0.0318   |\n",
      "|    std                  | 0.763     |\n",
      "|    value_loss           | 79.9      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.6      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 83        |\n",
      "|    time_elapsed         | 522       |\n",
      "|    total_timesteps      | 169984    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1691.5073 |\n",
      "|    clip_fraction        | 0.93      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.15     |\n",
      "|    explained_variance   | -20.1     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.342     |\n",
      "|    n_updates            | 1800      |\n",
      "|    policy_gradient_loss | 0.0189    |\n",
      "|    std                  | 0.764     |\n",
      "|    value_loss           | 4.07      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 15.7      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 325       |\n",
      "|    iterations           | 84        |\n",
      "|    time_elapsed         | 529       |\n",
      "|    total_timesteps      | 172032    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1142.4961 |\n",
      "|    clip_fraction        | 0.895     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.15     |\n",
      "|    explained_variance   | 0.00666   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 9.8e+03   |\n",
      "|    n_updates            | 1810      |\n",
      "|    policy_gradient_loss | 0.0713    |\n",
      "|    std                  | 0.767     |\n",
      "|    value_loss           | 3.23e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.6      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 85        |\n",
      "|    time_elapsed         | 535       |\n",
      "|    total_timesteps      | 174080    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3380.8218 |\n",
      "|    clip_fraction        | 0.95      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.16     |\n",
      "|    explained_variance   | 0.00375   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.335     |\n",
      "|    n_updates            | 1820      |\n",
      "|    policy_gradient_loss | 0.173     |\n",
      "|    std                  | 0.772     |\n",
      "|    value_loss           | 3.34e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.4     |\n",
      "|    ep_rew_mean          | -10.5    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 324      |\n",
      "|    iterations           | 86       |\n",
      "|    time_elapsed         | 542      |\n",
      "|    total_timesteps      | 176128   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 1638.118 |\n",
      "|    clip_fraction        | 0.95     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.16    |\n",
      "|    explained_variance   | 0.0138   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 3.74e+03 |\n",
      "|    n_updates            | 1830     |\n",
      "|    policy_gradient_loss | 0.102    |\n",
      "|    std                  | 0.772    |\n",
      "|    value_loss           | 3.34e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.8      |\n",
      "|    ep_rew_mean          | -0.456    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 87        |\n",
      "|    time_elapsed         | 548       |\n",
      "|    total_timesteps      | 178176    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3933.5044 |\n",
      "|    clip_fraction        | 0.947     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.16     |\n",
      "|    explained_variance   | 0.00995   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0466   |\n",
      "|    n_updates            | 1840      |\n",
      "|    policy_gradient_loss | 0.0364    |\n",
      "|    std                  | 0.774     |\n",
      "|    value_loss           | 2.19e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.6      |\n",
      "|    ep_rew_mean          | -0.462    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 88        |\n",
      "|    time_elapsed         | 554       |\n",
      "|    total_timesteps      | 180224    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 386.01083 |\n",
      "|    clip_fraction        | 0.911     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.15     |\n",
      "|    explained_variance   | -22.1     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0677    |\n",
      "|    n_updates            | 1850      |\n",
      "|    policy_gradient_loss | 0.0891    |\n",
      "|    std                  | 0.754     |\n",
      "|    value_loss           | 0.029     |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.5      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 89        |\n",
      "|    time_elapsed         | 561       |\n",
      "|    total_timesteps      | 182272    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1157.6632 |\n",
      "|    clip_fraction        | 0.936     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.14     |\n",
      "|    explained_variance   | -4.92     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.231     |\n",
      "|    n_updates            | 1860      |\n",
      "|    policy_gradient_loss | 0.183     |\n",
      "|    std                  | 0.76      |\n",
      "|    value_loss           | 0.0111    |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17        |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 90        |\n",
      "|    time_elapsed         | 567       |\n",
      "|    total_timesteps      | 184320    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 157.91489 |\n",
      "|    clip_fraction        | 0.876     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.16     |\n",
      "|    explained_variance   | -1.06     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.258     |\n",
      "|    n_updates            | 1870      |\n",
      "|    policy_gradient_loss | 0.181     |\n",
      "|    std                  | 0.778     |\n",
      "|    value_loss           | 0.0146    |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.7     |\n",
      "|    ep_rew_mean          | -10.5    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 324      |\n",
      "|    iterations           | 91       |\n",
      "|    time_elapsed         | 573      |\n",
      "|    total_timesteps      | 186368   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 35.31128 |\n",
      "|    clip_fraction        | 0.83     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.17    |\n",
      "|    explained_variance   | 4.48e-05 |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.158    |\n",
      "|    n_updates            | 1880     |\n",
      "|    policy_gradient_loss | 0.0923   |\n",
      "|    std                  | 0.788    |\n",
      "|    value_loss           | 2.43e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.6      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 92        |\n",
      "|    time_elapsed         | 580       |\n",
      "|    total_timesteps      | 188416    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 295.64343 |\n",
      "|    clip_fraction        | 0.872     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.18     |\n",
      "|    explained_variance   | 0.00594   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.66e+03  |\n",
      "|    n_updates            | 1890      |\n",
      "|    policy_gradient_loss | 0.0645    |\n",
      "|    std                  | 0.786     |\n",
      "|    value_loss           | 5.7e+03   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.5      |\n",
      "|    ep_rew_mean          | -0.462    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 93        |\n",
      "|    time_elapsed         | 586       |\n",
      "|    total_timesteps      | 190464    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 463.89523 |\n",
      "|    clip_fraction        | 0.905     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.18     |\n",
      "|    explained_variance   | -14.3     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.195     |\n",
      "|    n_updates            | 1900      |\n",
      "|    policy_gradient_loss | 0.2       |\n",
      "|    std                  | 0.784     |\n",
      "|    value_loss           | 1.13      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.2      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 94        |\n",
      "|    time_elapsed         | 593       |\n",
      "|    total_timesteps      | 192512    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1505.3654 |\n",
      "|    clip_fraction        | 0.927     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.16     |\n",
      "|    explained_variance   | -1.57     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.00149  |\n",
      "|    n_updates            | 1910      |\n",
      "|    policy_gradient_loss | 0.0436    |\n",
      "|    std                  | 0.765     |\n",
      "|    value_loss           | 0.0516    |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.3      |\n",
      "|    ep_rew_mean          | -0.463    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 95        |\n",
      "|    time_elapsed         | 599       |\n",
      "|    total_timesteps      | 194560    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 170.59114 |\n",
      "|    clip_fraction        | 0.895     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.15     |\n",
      "|    explained_variance   | 0.000851  |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0663    |\n",
      "|    n_updates            | 1920      |\n",
      "|    policy_gradient_loss | 0.0902    |\n",
      "|    std                  | 0.759     |\n",
      "|    value_loss           | 2.19e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.4      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 96        |\n",
      "|    time_elapsed         | 606       |\n",
      "|    total_timesteps      | 196608    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 364.13354 |\n",
      "|    clip_fraction        | 0.926     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.15     |\n",
      "|    explained_variance   | -19       |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.175     |\n",
      "|    n_updates            | 1930      |\n",
      "|    policy_gradient_loss | 0.0781    |\n",
      "|    std                  | 0.766     |\n",
      "|    value_loss           | 0.0163    |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17        |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 97        |\n",
      "|    time_elapsed         | 612       |\n",
      "|    total_timesteps      | 198656    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 311.31897 |\n",
      "|    clip_fraction        | 0.916     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.14     |\n",
      "|    explained_variance   | -8.24     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.148     |\n",
      "|    n_updates            | 1940      |\n",
      "|    policy_gradient_loss | 0.0233    |\n",
      "|    std                  | 0.751     |\n",
      "|    value_loss           | 0.00732   |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.8      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 324       |\n",
      "|    iterations           | 98        |\n",
      "|    time_elapsed         | 619       |\n",
      "|    total_timesteps      | 200704    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 10.321596 |\n",
      "|    clip_fraction        | 0.848     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.13     |\n",
      "|    explained_variance   | -0.549    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0424    |\n",
      "|    n_updates            | 1950      |\n",
      "|    policy_gradient_loss | 0.112     |\n",
      "|    std                  | 0.752     |\n",
      "|    value_loss           | 0.00454   |\n",
      "---------------------------------------\n"
     ]
    }
   ],
   "source": [
    "model = model.learn(total_timesteps=200000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:-0.46 +/- 0.02\n"
     ]
    }
   ],
   "source": [
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"model_backup/ppo_acc_bigger_200000_steps_new\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 16.3     |\n",
      "|    ep_rew_mean     | -10.5    |\n",
      "| time/              |          |\n",
      "|    fps             | 463      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.1      |\n",
      "|    ep_rew_mean          | -0.458    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 356       |\n",
      "|    iterations           | 2         |\n",
      "|    time_elapsed         | 11        |\n",
      "|    total_timesteps      | 4096      |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 35.81743  |\n",
      "|    clip_fraction        | 0.844     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.13     |\n",
      "|    explained_variance   | -8.74e-05 |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.439     |\n",
      "|    n_updates            | 1970      |\n",
      "|    policy_gradient_loss | 0.126     |\n",
      "|    std                  | 0.746     |\n",
      "|    value_loss           | 3.44e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17       |\n",
      "|    ep_rew_mean          | -0.461   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 332      |\n",
      "|    iterations           | 3        |\n",
      "|    time_elapsed         | 18       |\n",
      "|    total_timesteps      | 6144     |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 795.7029 |\n",
      "|    clip_fraction        | 0.901    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.1     |\n",
      "|    explained_variance   | -22.1    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | -0.0353  |\n",
      "|    n_updates            | 1980     |\n",
      "|    policy_gradient_loss | -0.0376  |\n",
      "|    std                  | 0.704    |\n",
      "|    value_loss           | 0.0514   |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.5     |\n",
      "|    ep_rew_mean          | -0.46    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 322      |\n",
      "|    iterations           | 4        |\n",
      "|    time_elapsed         | 25       |\n",
      "|    total_timesteps      | 8192     |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 3186.998 |\n",
      "|    clip_fraction        | 0.938    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.04    |\n",
      "|    explained_variance   | -5.54    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | -0.0107  |\n",
      "|    n_updates            | 1990     |\n",
      "|    policy_gradient_loss | -0.00555 |\n",
      "|    std                  | 0.665    |\n",
      "|    value_loss           | 0.00554  |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.4      |\n",
      "|    ep_rew_mean          | -0.462    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 317       |\n",
      "|    iterations           | 5         |\n",
      "|    time_elapsed         | 32        |\n",
      "|    total_timesteps      | 10240     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 137.69203 |\n",
      "|    clip_fraction        | 0.877     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.01     |\n",
      "|    explained_variance   | -0.344    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0608    |\n",
      "|    n_updates            | 2000      |\n",
      "|    policy_gradient_loss | 0.0713    |\n",
      "|    std                  | 0.66      |\n",
      "|    value_loss           | 0.00152   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.9      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 314       |\n",
      "|    iterations           | 6         |\n",
      "|    time_elapsed         | 39        |\n",
      "|    total_timesteps      | 12288     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3141.9854 |\n",
      "|    clip_fraction        | 0.908     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.01     |\n",
      "|    explained_variance   | 0.597     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.173     |\n",
      "|    n_updates            | 2010      |\n",
      "|    policy_gradient_loss | 0.189     |\n",
      "|    std                  | 0.667     |\n",
      "|    value_loss           | 0.00274   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 7         |\n",
      "|    time_elapsed         | 45        |\n",
      "|    total_timesteps      | 14336     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 27.398006 |\n",
      "|    clip_fraction        | 0.84      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.02     |\n",
      "|    explained_variance   | 0.74      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.136     |\n",
      "|    n_updates            | 2020      |\n",
      "|    policy_gradient_loss | 0.138     |\n",
      "|    std                  | 0.671     |\n",
      "|    value_loss           | 0.00101   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.8      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 8         |\n",
      "|    time_elapsed         | 52        |\n",
      "|    total_timesteps      | 16384     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 181.49194 |\n",
      "|    clip_fraction        | 0.885     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.02     |\n",
      "|    explained_variance   | 0.872     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.259     |\n",
      "|    n_updates            | 2030      |\n",
      "|    policy_gradient_loss | 0.156     |\n",
      "|    std                  | 0.673     |\n",
      "|    value_loss           | 0.00302   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 310       |\n",
      "|    iterations           | 9         |\n",
      "|    time_elapsed         | 59        |\n",
      "|    total_timesteps      | 18432     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 76.225365 |\n",
      "|    clip_fraction        | 0.859     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.02     |\n",
      "|    explained_variance   | -0.000228 |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.677     |\n",
      "|    n_updates            | 2040      |\n",
      "|    policy_gradient_loss | 0.156     |\n",
      "|    std                  | 0.673     |\n",
      "|    value_loss           | 2.98e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.6      |\n",
      "|    ep_rew_mean          | -0.457    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 309       |\n",
      "|    iterations           | 10        |\n",
      "|    time_elapsed         | 66        |\n",
      "|    total_timesteps      | 20480     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2743.9148 |\n",
      "|    clip_fraction        | 0.92      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.02     |\n",
      "|    explained_variance   | 0.00578   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.123     |\n",
      "|    n_updates            | 2050      |\n",
      "|    policy_gradient_loss | 0.0901    |\n",
      "|    std                  | 0.658     |\n",
      "|    value_loss           | 2.43e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.1     |\n",
      "|    ep_rew_mean          | -0.461   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 309      |\n",
      "|    iterations           | 11       |\n",
      "|    time_elapsed         | 72       |\n",
      "|    total_timesteps      | 22528    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 7701.021 |\n",
      "|    clip_fraction        | 0.919    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.984   |\n",
      "|    explained_variance   | -14.4    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.0285   |\n",
      "|    n_updates            | 2060     |\n",
      "|    policy_gradient_loss | 0.0728   |\n",
      "|    std                  | 0.638    |\n",
      "|    value_loss           | 0.0165   |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 307       |\n",
      "|    iterations           | 12        |\n",
      "|    time_elapsed         | 79        |\n",
      "|    total_timesteps      | 24576     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1232.4514 |\n",
      "|    clip_fraction        | 0.907     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.958    |\n",
      "|    explained_variance   | -34.5     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0181   |\n",
      "|    n_updates            | 2070      |\n",
      "|    policy_gradient_loss | 0.0569    |\n",
      "|    std                  | 0.623     |\n",
      "|    value_loss           | 0.00295   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.6      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 305       |\n",
      "|    iterations           | 13        |\n",
      "|    time_elapsed         | 87        |\n",
      "|    total_timesteps      | 26624     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 23.292542 |\n",
      "|    clip_fraction        | 0.883     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.944    |\n",
      "|    explained_variance   | -0.604    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0105   |\n",
      "|    n_updates            | 2080      |\n",
      "|    policy_gradient_loss | 0.112     |\n",
      "|    std                  | 0.623     |\n",
      "|    value_loss           | 0.00223   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18        |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 301       |\n",
      "|    iterations           | 14        |\n",
      "|    time_elapsed         | 95        |\n",
      "|    total_timesteps      | 28672     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 46.402294 |\n",
      "|    clip_fraction        | 0.856     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.938    |\n",
      "|    explained_variance   | -0.000149 |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.27e+03  |\n",
      "|    n_updates            | 2090      |\n",
      "|    policy_gradient_loss | 0.109     |\n",
      "|    std                  | 0.613     |\n",
      "|    value_loss           | 3.46e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.8      |\n",
      "|    ep_rew_mean          | -0.457    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 295       |\n",
      "|    iterations           | 15        |\n",
      "|    time_elapsed         | 103       |\n",
      "|    total_timesteps      | 30720     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3577.1187 |\n",
      "|    clip_fraction        | 0.909     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.923    |\n",
      "|    explained_variance   | 0.00283   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0943    |\n",
      "|    n_updates            | 2100      |\n",
      "|    policy_gradient_loss | 1.1       |\n",
      "|    std                  | 0.604     |\n",
      "|    value_loss           | 2.2e+03   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.2      |\n",
      "|    ep_rew_mean          | -0.463    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 294       |\n",
      "|    iterations           | 16        |\n",
      "|    time_elapsed         | 111       |\n",
      "|    total_timesteps      | 32768     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1132.5166 |\n",
      "|    clip_fraction        | 0.865     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.901    |\n",
      "|    explained_variance   | -13.2     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0416   |\n",
      "|    n_updates            | 2110      |\n",
      "|    policy_gradient_loss | -0.0254   |\n",
      "|    std                  | 0.584     |\n",
      "|    value_loss           | 0.00375   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.8      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 292       |\n",
      "|    iterations           | 17        |\n",
      "|    time_elapsed         | 119       |\n",
      "|    total_timesteps      | 34816     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 6665.0127 |\n",
      "|    clip_fraction        | 0.929     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.867    |\n",
      "|    explained_variance   | -0.898    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0131    |\n",
      "|    n_updates            | 2120      |\n",
      "|    policy_gradient_loss | 0.0286    |\n",
      "|    std                  | 0.566     |\n",
      "|    value_loss           | 0.00157   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.9      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 291       |\n",
      "|    iterations           | 18        |\n",
      "|    time_elapsed         | 126       |\n",
      "|    total_timesteps      | 36864     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 10006.409 |\n",
      "|    clip_fraction        | 0.921     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.838    |\n",
      "|    explained_variance   | 0.0929    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.227     |\n",
      "|    n_updates            | 2130      |\n",
      "|    policy_gradient_loss | 0.237     |\n",
      "|    std                  | 0.55      |\n",
      "|    value_loss           | 0.00112   |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.6     |\n",
      "|    ep_rew_mean          | -0.46    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 290      |\n",
      "|    iterations           | 19       |\n",
      "|    time_elapsed         | 134      |\n",
      "|    total_timesteps      | 38912    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 90.16227 |\n",
      "|    clip_fraction        | 0.866    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.824   |\n",
      "|    explained_variance   | 0.76     |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.189    |\n",
      "|    n_updates            | 2140     |\n",
      "|    policy_gradient_loss | 0.203    |\n",
      "|    std                  | 0.553    |\n",
      "|    value_loss           | 0.000351 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.7      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 286       |\n",
      "|    iterations           | 20        |\n",
      "|    time_elapsed         | 142       |\n",
      "|    total_timesteps      | 40960     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3114.5635 |\n",
      "|    clip_fraction        | 0.914     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.83     |\n",
      "|    explained_variance   | 0.921     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0984    |\n",
      "|    n_updates            | 2150      |\n",
      "|    policy_gradient_loss | 0.146     |\n",
      "|    std                  | 0.553     |\n",
      "|    value_loss           | 0.00116   |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.9     |\n",
      "|    ep_rew_mean          | -0.461   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 284      |\n",
      "|    iterations           | 21       |\n",
      "|    time_elapsed         | 151      |\n",
      "|    total_timesteps      | 43008    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4744.28  |\n",
      "|    clip_fraction        | 0.921    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.841   |\n",
      "|    explained_variance   | 0.787    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.292    |\n",
      "|    n_updates            | 2160     |\n",
      "|    policy_gradient_loss | 0.311    |\n",
      "|    std                  | 0.573    |\n",
      "|    value_loss           | 0.00205  |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 18       |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 282      |\n",
      "|    iterations           | 22       |\n",
      "|    time_elapsed         | 159      |\n",
      "|    total_timesteps      | 45056    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 43.23597 |\n",
      "|    clip_fraction        | 0.861    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.871   |\n",
      "|    explained_variance   | 0.9      |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.227    |\n",
      "|    n_updates            | 2170     |\n",
      "|    policy_gradient_loss | 0.18     |\n",
      "|    std                  | 0.582    |\n",
      "|    value_loss           | 0.000207 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.8      |\n",
      "|    ep_rew_mean          | -0.457    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 279       |\n",
      "|    iterations           | 23        |\n",
      "|    time_elapsed         | 168       |\n",
      "|    total_timesteps      | 47104     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1237.6472 |\n",
      "|    clip_fraction        | 0.892     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.877    |\n",
      "|    explained_variance   | 0.91      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0211   |\n",
      "|    n_updates            | 2180      |\n",
      "|    policy_gradient_loss | 0.15      |\n",
      "|    std                  | 0.58      |\n",
      "|    value_loss           | 0.000255  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 19.2      |\n",
      "|    ep_rew_mean          | -0.456    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 278       |\n",
      "|    iterations           | 24        |\n",
      "|    time_elapsed         | 176       |\n",
      "|    total_timesteps      | 49152     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 703.56604 |\n",
      "|    clip_fraction        | 0.913     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.877    |\n",
      "|    explained_variance   | 0.953     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.274     |\n",
      "|    n_updates            | 2190      |\n",
      "|    policy_gradient_loss | 0.176     |\n",
      "|    std                  | 0.585     |\n",
      "|    value_loss           | 0.000493  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 16.9       |\n",
      "|    ep_rew_mean          | -0.461     |\n",
      "| time/                   |            |\n",
      "|    fps                  | 276        |\n",
      "|    iterations           | 25         |\n",
      "|    time_elapsed         | 185        |\n",
      "|    total_timesteps      | 51200      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 104.638245 |\n",
      "|    clip_fraction        | 0.857      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.888     |\n",
      "|    explained_variance   | 0.962      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.277      |\n",
      "|    n_updates            | 2200       |\n",
      "|    policy_gradient_loss | 0.209      |\n",
      "|    std                  | 0.59       |\n",
      "|    value_loss           | 0.000976   |\n",
      "----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.4     |\n",
      "|    ep_rew_mean          | -10.5    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 274      |\n",
      "|    iterations           | 26       |\n",
      "|    time_elapsed         | 193      |\n",
      "|    total_timesteps      | 53248    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 5509.244 |\n",
      "|    clip_fraction        | 0.9      |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.888   |\n",
      "|    explained_variance   | 0.937    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.233    |\n",
      "|    n_updates            | 2210     |\n",
      "|    policy_gradient_loss | 0.193    |\n",
      "|    std                  | 0.584    |\n",
      "|    value_loss           | 0.00144  |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.5      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 273       |\n",
      "|    iterations           | 27        |\n",
      "|    time_elapsed         | 201       |\n",
      "|    total_timesteps      | 55296     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 861.9478  |\n",
      "|    clip_fraction        | 0.9       |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.882    |\n",
      "|    explained_variance   | -0.000222 |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.16      |\n",
      "|    n_updates            | 2220      |\n",
      "|    policy_gradient_loss | 0.193     |\n",
      "|    std                  | 0.585     |\n",
      "|    value_loss           | 3.36e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17       |\n",
      "|    ep_rew_mean          | -0.461   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 273      |\n",
      "|    iterations           | 28       |\n",
      "|    time_elapsed         | 209      |\n",
      "|    total_timesteps      | 57344    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 813.8137 |\n",
      "|    clip_fraction        | 0.938    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.853   |\n",
      "|    explained_variance   | -4.91    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | -0.0528  |\n",
      "|    n_updates            | 2230     |\n",
      "|    policy_gradient_loss | 0.0093   |\n",
      "|    std                  | 0.549    |\n",
      "|    value_loss           | 0.0025   |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.5      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 272       |\n",
      "|    iterations           | 29        |\n",
      "|    time_elapsed         | 218       |\n",
      "|    total_timesteps      | 59392     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4986.0576 |\n",
      "|    clip_fraction        | 0.926     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.805    |\n",
      "|    explained_variance   | -0.111    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0464    |\n",
      "|    n_updates            | 2240      |\n",
      "|    policy_gradient_loss | 0.12      |\n",
      "|    std                  | 0.53      |\n",
      "|    value_loss           | 0.00136   |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.6     |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 269      |\n",
      "|    iterations           | 30       |\n",
      "|    time_elapsed         | 227      |\n",
      "|    total_timesteps      | 61440    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 14145.1  |\n",
      "|    clip_fraction        | 0.932    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.777   |\n",
      "|    explained_variance   | 0.756    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.151    |\n",
      "|    n_updates            | 2250     |\n",
      "|    policy_gradient_loss | 0.178    |\n",
      "|    std                  | 0.519    |\n",
      "|    value_loss           | 0.000547 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 18.1     |\n",
      "|    ep_rew_mean          | -0.458   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 268      |\n",
      "|    iterations           | 31       |\n",
      "|    time_elapsed         | 236      |\n",
      "|    total_timesteps      | 63488    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 12339.82 |\n",
      "|    clip_fraction        | 0.928    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.776   |\n",
      "|    explained_variance   | 0.806    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.294    |\n",
      "|    n_updates            | 2260     |\n",
      "|    policy_gradient_loss | 0.241    |\n",
      "|    std                  | 0.533    |\n",
      "|    value_loss           | 0.000642 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.2     |\n",
      "|    ep_rew_mean          | -0.461   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 267      |\n",
      "|    iterations           | 32       |\n",
      "|    time_elapsed         | 245      |\n",
      "|    total_timesteps      | 65536    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 6449.774 |\n",
      "|    clip_fraction        | 0.907    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.791   |\n",
      "|    explained_variance   | 0.904    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.0584   |\n",
      "|    n_updates            | 2270     |\n",
      "|    policy_gradient_loss | 0.204    |\n",
      "|    std                  | 0.532    |\n",
      "|    value_loss           | 0.00021  |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.4     |\n",
      "|    ep_rew_mean          | -0.462   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 265      |\n",
      "|    iterations           | 33       |\n",
      "|    time_elapsed         | 254      |\n",
      "|    total_timesteps      | 67584    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 5554.705 |\n",
      "|    clip_fraction        | 0.975    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.804   |\n",
      "|    explained_variance   | 0.91     |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.228    |\n",
      "|    n_updates            | 2280     |\n",
      "|    policy_gradient_loss | 0.261    |\n",
      "|    std                  | 0.546    |\n",
      "|    value_loss           | 0.00136  |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.3      |\n",
      "|    ep_rew_mean          | -0.458    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 264       |\n",
      "|    iterations           | 34        |\n",
      "|    time_elapsed         | 263       |\n",
      "|    total_timesteps      | 69632     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 13069.166 |\n",
      "|    clip_fraction        | 0.945     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.822    |\n",
      "|    explained_variance   | 0.924     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.276     |\n",
      "|    n_updates            | 2290      |\n",
      "|    policy_gradient_loss | 0.26      |\n",
      "|    std                  | 0.562     |\n",
      "|    value_loss           | 0.000152  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 263       |\n",
      "|    iterations           | 35        |\n",
      "|    time_elapsed         | 271       |\n",
      "|    total_timesteps      | 71680     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 486.22668 |\n",
      "|    clip_fraction        | 0.906     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.852    |\n",
      "|    explained_variance   | 0.954     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.267     |\n",
      "|    n_updates            | 2300      |\n",
      "|    policy_gradient_loss | 0.219     |\n",
      "|    std                  | 0.572     |\n",
      "|    value_loss           | 0.000119  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.4      |\n",
      "|    ep_rew_mean          | -0.462    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 263       |\n",
      "|    iterations           | 36        |\n",
      "|    time_elapsed         | 279       |\n",
      "|    total_timesteps      | 73728     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 12360.752 |\n",
      "|    clip_fraction        | 0.975     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.876    |\n",
      "|    explained_variance   | 0.933     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.291     |\n",
      "|    n_updates            | 2310      |\n",
      "|    policy_gradient_loss | 0.339     |\n",
      "|    std                  | 0.584     |\n",
      "|    value_loss           | 0.000596  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17        |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 265       |\n",
      "|    iterations           | 37        |\n",
      "|    time_elapsed         | 285       |\n",
      "|    total_timesteps      | 75776     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 5743.9316 |\n",
      "|    clip_fraction        | 0.904     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.884    |\n",
      "|    explained_variance   | 0.968     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.228     |\n",
      "|    n_updates            | 2320      |\n",
      "|    policy_gradient_loss | 0.218     |\n",
      "|    std                  | 0.587     |\n",
      "|    value_loss           | 8.02e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.7      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 266       |\n",
      "|    iterations           | 38        |\n",
      "|    time_elapsed         | 291       |\n",
      "|    total_timesteps      | 77824     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3956.7336 |\n",
      "|    clip_fraction        | 0.926     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.894    |\n",
      "|    explained_variance   | 0.968     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.149     |\n",
      "|    n_updates            | 2330      |\n",
      "|    policy_gradient_loss | 0.212     |\n",
      "|    std                  | 0.595     |\n",
      "|    value_loss           | 8.54e-05  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 18.2       |\n",
      "|    ep_rew_mean          | -0.458     |\n",
      "| time/                   |            |\n",
      "|    fps                  | 268        |\n",
      "|    iterations           | 39         |\n",
      "|    time_elapsed         | 297        |\n",
      "|    total_timesteps      | 79872      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 1011.46326 |\n",
      "|    clip_fraction        | 0.918      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.907     |\n",
      "|    explained_variance   | 0.973      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.229      |\n",
      "|    n_updates            | 2340       |\n",
      "|    policy_gradient_loss | 0.229      |\n",
      "|    std                  | 0.603      |\n",
      "|    value_loss           | 5.6e-05    |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.9      |\n",
      "|    ep_rew_mean          | -0.456    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 269       |\n",
      "|    iterations           | 40        |\n",
      "|    time_elapsed         | 303       |\n",
      "|    total_timesteps      | 81920     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 11323.352 |\n",
      "|    clip_fraction        | 0.878     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.921    |\n",
      "|    explained_variance   | 0.977     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.24      |\n",
      "|    n_updates            | 2350      |\n",
      "|    policy_gradient_loss | 0.221     |\n",
      "|    std                  | 0.618     |\n",
      "|    value_loss           | 0.000131  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.7     |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 271      |\n",
      "|    iterations           | 41       |\n",
      "|    time_elapsed         | 309      |\n",
      "|    total_timesteps      | 83968    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 19.52686 |\n",
      "|    clip_fraction        | 0.849    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.945   |\n",
      "|    explained_variance   | 0.835    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.277    |\n",
      "|    n_updates            | 2360     |\n",
      "|    policy_gradient_loss | 0.313    |\n",
      "|    std                  | 0.632    |\n",
      "|    value_loss           | 0.000738 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.9     |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 272      |\n",
      "|    iterations           | 42       |\n",
      "|    time_elapsed         | 315      |\n",
      "|    total_timesteps      | 86016    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 860.7547 |\n",
      "|    clip_fraction        | 0.901    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.967   |\n",
      "|    explained_variance   | 0.945    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.254    |\n",
      "|    n_updates            | 2370     |\n",
      "|    policy_gradient_loss | 0.222    |\n",
      "|    std                  | 0.641    |\n",
      "|    value_loss           | 0.000101 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.3      |\n",
      "|    ep_rew_mean          | -0.458    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 273       |\n",
      "|    iterations           | 43        |\n",
      "|    time_elapsed         | 321       |\n",
      "|    total_timesteps      | 88064     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 153.20552 |\n",
      "|    clip_fraction        | 0.907     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.973    |\n",
      "|    explained_variance   | 0.952     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.183     |\n",
      "|    n_updates            | 2380      |\n",
      "|    policy_gradient_loss | 0.192     |\n",
      "|    std                  | 0.641     |\n",
      "|    value_loss           | 7.9e-05   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.3      |\n",
      "|    ep_rew_mean          | -0.458    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 275       |\n",
      "|    iterations           | 44        |\n",
      "|    time_elapsed         | 327       |\n",
      "|    total_timesteps      | 90112     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 18.077599 |\n",
      "|    clip_fraction        | 0.89      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.978    |\n",
      "|    explained_variance   | 0.983     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0612    |\n",
      "|    n_updates            | 2390      |\n",
      "|    policy_gradient_loss | 0.205     |\n",
      "|    std                  | 0.646     |\n",
      "|    value_loss           | 4.73e-05  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.9     |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 276      |\n",
      "|    iterations           | 45       |\n",
      "|    time_elapsed         | 333      |\n",
      "|    total_timesteps      | 92160    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 6491.178 |\n",
      "|    clip_fraction        | 0.893    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.985   |\n",
      "|    explained_variance   | 0.984    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.231    |\n",
      "|    n_updates            | 2400     |\n",
      "|    policy_gradient_loss | 0.23     |\n",
      "|    std                  | 0.652    |\n",
      "|    value_loss           | 0.00014  |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.2      |\n",
      "|    ep_rew_mean          | -0.458    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 277       |\n",
      "|    iterations           | 46        |\n",
      "|    time_elapsed         | 339       |\n",
      "|    total_timesteps      | 94208     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2247.1523 |\n",
      "|    clip_fraction        | 0.901     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.979    |\n",
      "|    explained_variance   | 0.933     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.151     |\n",
      "|    n_updates            | 2410      |\n",
      "|    policy_gradient_loss | 0.142     |\n",
      "|    std                  | 0.634     |\n",
      "|    value_loss           | 0.000393  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.7      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 278       |\n",
      "|    iterations           | 47        |\n",
      "|    time_elapsed         | 345       |\n",
      "|    total_timesteps      | 96256     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1958.1338 |\n",
      "|    clip_fraction        | 0.899     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.958    |\n",
      "|    explained_variance   | 0.983     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.107     |\n",
      "|    n_updates            | 2420      |\n",
      "|    policy_gradient_loss | 0.18      |\n",
      "|    std                  | 0.626     |\n",
      "|    value_loss           | 3.58e-05  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.2     |\n",
      "|    ep_rew_mean          | -0.461   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 279      |\n",
      "|    iterations           | 48       |\n",
      "|    time_elapsed         | 351      |\n",
      "|    total_timesteps      | 98304    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 670.0194 |\n",
      "|    clip_fraction        | 0.905    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.948   |\n",
      "|    explained_variance   | 0.986    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.486    |\n",
      "|    n_updates            | 2430     |\n",
      "|    policy_gradient_loss | 0.231    |\n",
      "|    std                  | 0.624    |\n",
      "|    value_loss           | 4.07e-05 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 15.8     |\n",
      "|    ep_rew_mean          | -0.464   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 280      |\n",
      "|    iterations           | 49       |\n",
      "|    time_elapsed         | 357      |\n",
      "|    total_timesteps      | 100352   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 92.64935 |\n",
      "|    clip_fraction        | 0.884    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.955   |\n",
      "|    explained_variance   | 0.989    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.258    |\n",
      "|    n_updates            | 2440     |\n",
      "|    policy_gradient_loss | 0.21     |\n",
      "|    std                  | 0.633    |\n",
      "|    value_loss           | 9.97e-05 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.6     |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 280      |\n",
      "|    iterations           | 50       |\n",
      "|    time_elapsed         | 365      |\n",
      "|    total_timesteps      | 102400   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 322.0231 |\n",
      "|    clip_fraction        | 0.929    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.961   |\n",
      "|    explained_variance   | 0.985    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.216    |\n",
      "|    n_updates            | 2450     |\n",
      "|    policy_gradient_loss | 0.218    |\n",
      "|    std                  | 0.631    |\n",
      "|    value_loss           | 0.000669 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.8      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 280       |\n",
      "|    iterations           | 51        |\n",
      "|    time_elapsed         | 371       |\n",
      "|    total_timesteps      | 104448    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 12470.781 |\n",
      "|    clip_fraction        | 0.98      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.988    |\n",
      "|    explained_variance   | -0.176    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.292     |\n",
      "|    n_updates            | 2460      |\n",
      "|    policy_gradient_loss | 0.312     |\n",
      "|    std                  | 0.664     |\n",
      "|    value_loss           | 0.000744  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 280       |\n",
      "|    iterations           | 52        |\n",
      "|    time_elapsed         | 379       |\n",
      "|    total_timesteps      | 106496    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 51.192345 |\n",
      "|    clip_fraction        | 0.878     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.02     |\n",
      "|    explained_variance   | 0.952     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.265     |\n",
      "|    n_updates            | 2470      |\n",
      "|    policy_gradient_loss | 0.271     |\n",
      "|    std                  | 0.68      |\n",
      "|    value_loss           | 6.94e-05  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.7     |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 281      |\n",
      "|    iterations           | 53       |\n",
      "|    time_elapsed         | 386      |\n",
      "|    total_timesteps      | 108544   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 20.69296 |\n",
      "|    clip_fraction        | 0.834    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.04    |\n",
      "|    explained_variance   | 0.963    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.182    |\n",
      "|    n_updates            | 2480     |\n",
      "|    policy_gradient_loss | 0.207    |\n",
      "|    std                  | 0.684    |\n",
      "|    value_loss           | 6.31e-05 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18        |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 281       |\n",
      "|    iterations           | 54        |\n",
      "|    time_elapsed         | 392       |\n",
      "|    total_timesteps      | 110592    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 43.895775 |\n",
      "|    clip_fraction        | 0.843     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.04     |\n",
      "|    explained_variance   | 0.98      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.13      |\n",
      "|    n_updates            | 2490      |\n",
      "|    policy_gradient_loss | 0.194     |\n",
      "|    std                  | 0.688     |\n",
      "|    value_loss           | 3.99e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 282       |\n",
      "|    iterations           | 55        |\n",
      "|    time_elapsed         | 399       |\n",
      "|    total_timesteps      | 112640    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 482.07184 |\n",
      "|    clip_fraction        | 0.841     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.05     |\n",
      "|    explained_variance   | 0.984     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.246     |\n",
      "|    n_updates            | 2500      |\n",
      "|    policy_gradient_loss | 0.184     |\n",
      "|    std                  | 0.692     |\n",
      "|    value_loss           | 3.37e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.9      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 282       |\n",
      "|    iterations           | 56        |\n",
      "|    time_elapsed         | 405       |\n",
      "|    total_timesteps      | 114688    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1196.1906 |\n",
      "|    clip_fraction        | 0.922     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.05     |\n",
      "|    explained_variance   | 0.981     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.217     |\n",
      "|    n_updates            | 2510      |\n",
      "|    policy_gradient_loss | 0.229     |\n",
      "|    std                  | 0.683     |\n",
      "|    value_loss           | 6.18e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 283       |\n",
      "|    iterations           | 57        |\n",
      "|    time_elapsed         | 412       |\n",
      "|    total_timesteps      | 116736    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 825.18805 |\n",
      "|    clip_fraction        | 0.894     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.04     |\n",
      "|    explained_variance   | 0.988     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.248     |\n",
      "|    n_updates            | 2520      |\n",
      "|    policy_gradient_loss | 0.168     |\n",
      "|    std                  | 0.686     |\n",
      "|    value_loss           | 2.8e-05   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.6      |\n",
      "|    ep_rew_mean          | -0.462    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 284       |\n",
      "|    iterations           | 58        |\n",
      "|    time_elapsed         | 418       |\n",
      "|    total_timesteps      | 118784    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2121.4585 |\n",
      "|    clip_fraction        | 0.923     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.06     |\n",
      "|    explained_variance   | 0.987     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.195     |\n",
      "|    n_updates            | 2530      |\n",
      "|    policy_gradient_loss | 0.236     |\n",
      "|    std                  | 0.704     |\n",
      "|    value_loss           | 0.000406  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.5      |\n",
      "|    ep_rew_mean          | -0.457    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 284       |\n",
      "|    iterations           | 59        |\n",
      "|    time_elapsed         | 424       |\n",
      "|    total_timesteps      | 120832    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 20.196316 |\n",
      "|    clip_fraction        | 0.838     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.07     |\n",
      "|    explained_variance   | 0.938     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.213     |\n",
      "|    n_updates            | 2540      |\n",
      "|    policy_gradient_loss | 0.162     |\n",
      "|    std                  | 0.706     |\n",
      "|    value_loss           | 0.000222  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.7      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 285       |\n",
      "|    iterations           | 60        |\n",
      "|    time_elapsed         | 430       |\n",
      "|    total_timesteps      | 122880    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 374.10062 |\n",
      "|    clip_fraction        | 0.82      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.07     |\n",
      "|    explained_variance   | 0.971     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.211     |\n",
      "|    n_updates            | 2550      |\n",
      "|    policy_gradient_loss | 0.226     |\n",
      "|    std                  | 0.705     |\n",
      "|    value_loss           | 7.39e-05  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.5      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 286       |\n",
      "|    iterations           | 61        |\n",
      "|    time_elapsed         | 436       |\n",
      "|    total_timesteps      | 124928    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 213.60739 |\n",
      "|    clip_fraction        | 0.89      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.07     |\n",
      "|    explained_variance   | 0.982     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.182     |\n",
      "|    n_updates            | 2560      |\n",
      "|    policy_gradient_loss | 0.258     |\n",
      "|    std                  | 0.712     |\n",
      "|    value_loss           | 4.62e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.7      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 286       |\n",
      "|    iterations           | 62        |\n",
      "|    time_elapsed         | 442       |\n",
      "|    total_timesteps      | 126976    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 22.863716 |\n",
      "|    clip_fraction        | 0.858     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.09     |\n",
      "|    explained_variance   | 0.983     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.203     |\n",
      "|    n_updates            | 2570      |\n",
      "|    policy_gradient_loss | 0.197     |\n",
      "|    std                  | 0.722     |\n",
      "|    value_loss           | 3.93e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 19.4      |\n",
      "|    ep_rew_mean          | -0.455    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 287       |\n",
      "|    iterations           | 63        |\n",
      "|    time_elapsed         | 448       |\n",
      "|    total_timesteps      | 129024    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.7832832 |\n",
      "|    clip_fraction        | 0.8       |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.1      |\n",
      "|    explained_variance   | 0.987     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.19      |\n",
      "|    n_updates            | 2580      |\n",
      "|    policy_gradient_loss | 0.204     |\n",
      "|    std                  | 0.736     |\n",
      "|    value_loss           | 0.00193   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.1      |\n",
      "|    ep_rew_mean          | -10.5     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 288       |\n",
      "|    iterations           | 64        |\n",
      "|    time_elapsed         | 454       |\n",
      "|    total_timesteps      | 131072    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 22.410378 |\n",
      "|    clip_fraction        | 0.839     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.12     |\n",
      "|    explained_variance   | 0.866     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0763    |\n",
      "|    n_updates            | 2590      |\n",
      "|    policy_gradient_loss | 0.171     |\n",
      "|    std                  | 0.748     |\n",
      "|    value_loss           | 0.0002    |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.8      |\n",
      "|    ep_rew_mean          | -0.462    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 288       |\n",
      "|    iterations           | 65        |\n",
      "|    time_elapsed         | 460       |\n",
      "|    total_timesteps      | 133120    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 161.91162 |\n",
      "|    clip_fraction        | 0.884     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.13     |\n",
      "|    explained_variance   | -0.000236 |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.229     |\n",
      "|    n_updates            | 2600      |\n",
      "|    policy_gradient_loss | 0.346     |\n",
      "|    std                  | 0.753     |\n",
      "|    value_loss           | 2.81e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 289       |\n",
      "|    iterations           | 66        |\n",
      "|    time_elapsed         | 466       |\n",
      "|    total_timesteps      | 135168    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1247.4353 |\n",
      "|    clip_fraction        | 0.909     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -1.1      |\n",
      "|    explained_variance   | 0.00334   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 5.39e+03  |\n",
      "|    n_updates            | 2610      |\n",
      "|    policy_gradient_loss | 0.0373    |\n",
      "|    std                  | 0.693     |\n",
      "|    value_loss           | 2.82e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.7     |\n",
      "|    ep_rew_mean          | -0.462   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 290      |\n",
      "|    iterations           | 67       |\n",
      "|    time_elapsed         | 472      |\n",
      "|    total_timesteps      | 137216   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4447.205 |\n",
      "|    clip_fraction        | 0.929    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.04    |\n",
      "|    explained_variance   | 0.0022   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.0209   |\n",
      "|    n_updates            | 2620     |\n",
      "|    policy_gradient_loss | -0.00564 |\n",
      "|    std                  | 0.674    |\n",
      "|    value_loss           | 1.93e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.8      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 290       |\n",
      "|    iterations           | 68        |\n",
      "|    time_elapsed         | 479       |\n",
      "|    total_timesteps      | 139264    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 945.63696 |\n",
      "|    clip_fraction        | 0.92      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.991    |\n",
      "|    explained_variance   | -12.9     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0645   |\n",
      "|    n_updates            | 2630      |\n",
      "|    policy_gradient_loss | -0.0172   |\n",
      "|    std                  | 0.63      |\n",
      "|    value_loss           | 0.00589   |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.7     |\n",
      "|    ep_rew_mean          | -0.462   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 291      |\n",
      "|    iterations           | 69       |\n",
      "|    time_elapsed         | 484      |\n",
      "|    total_timesteps      | 141312   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4852.873 |\n",
      "|    clip_fraction        | 0.895    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.946   |\n",
      "|    explained_variance   | -0.547   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | -0.0339  |\n",
      "|    n_updates            | 2640     |\n",
      "|    policy_gradient_loss | -0.0293  |\n",
      "|    std                  | 0.614    |\n",
      "|    value_loss           | 0.000831 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 292       |\n",
      "|    iterations           | 70        |\n",
      "|    time_elapsed         | 490       |\n",
      "|    total_timesteps      | 143360    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 170.44012 |\n",
      "|    clip_fraction        | 0.875     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.93     |\n",
      "|    explained_variance   | 0.753     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0828    |\n",
      "|    n_updates            | 2650      |\n",
      "|    policy_gradient_loss | 0.065     |\n",
      "|    std                  | 0.613     |\n",
      "|    value_loss           | 0.000434  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.8      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 291       |\n",
      "|    iterations           | 71        |\n",
      "|    time_elapsed         | 499       |\n",
      "|    total_timesteps      | 145408    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 170.51431 |\n",
      "|    clip_fraction        | 0.877     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.927    |\n",
      "|    explained_variance   | 0.83      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0529   |\n",
      "|    n_updates            | 2660      |\n",
      "|    policy_gradient_loss | 0.0706    |\n",
      "|    std                  | 0.608     |\n",
      "|    value_loss           | 0.000321  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18        |\n",
      "|    ep_rew_mean          | -0.458    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 289       |\n",
      "|    iterations           | 72        |\n",
      "|    time_elapsed         | 508       |\n",
      "|    total_timesteps      | 147456    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 310.92993 |\n",
      "|    clip_fraction        | 0.894     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.909    |\n",
      "|    explained_variance   | 0.901     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0276    |\n",
      "|    n_updates            | 2670      |\n",
      "|    policy_gradient_loss | 0.0871    |\n",
      "|    std                  | 0.596     |\n",
      "|    value_loss           | 0.000217  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 18       |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 288      |\n",
      "|    iterations           | 73       |\n",
      "|    time_elapsed         | 517      |\n",
      "|    total_timesteps      | 149504   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 812.9665 |\n",
      "|    clip_fraction        | 0.889    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.893   |\n",
      "|    explained_variance   | 0.942    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.206    |\n",
      "|    n_updates            | 2680     |\n",
      "|    policy_gradient_loss | 0.144    |\n",
      "|    std                  | 0.587    |\n",
      "|    value_loss           | 0.00012  |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.1     |\n",
      "|    ep_rew_mean          | -0.463   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 287      |\n",
      "|    iterations           | 74       |\n",
      "|    time_elapsed         | 526      |\n",
      "|    total_timesteps      | 151552   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 651.2727 |\n",
      "|    clip_fraction        | 0.901    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.887   |\n",
      "|    explained_variance   | 0.94     |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.243    |\n",
      "|    n_updates            | 2690     |\n",
      "|    policy_gradient_loss | 0.143    |\n",
      "|    std                  | 0.587    |\n",
      "|    value_loss           | 0.000125 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 18.3     |\n",
      "|    ep_rew_mean          | -0.458   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 286      |\n",
      "|    iterations           | 75       |\n",
      "|    time_elapsed         | 536      |\n",
      "|    total_timesteps      | 153600   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 971.5354 |\n",
      "|    clip_fraction        | 0.917    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.888   |\n",
      "|    explained_variance   | 0.942    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.143    |\n",
      "|    n_updates            | 2700     |\n",
      "|    policy_gradient_loss | 0.28     |\n",
      "|    std                  | 0.587    |\n",
      "|    value_loss           | 0.000115 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.8     |\n",
      "|    ep_rew_mean          | -0.459   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 285      |\n",
      "|    iterations           | 76       |\n",
      "|    time_elapsed         | 545      |\n",
      "|    total_timesteps      | 155648   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 455.2525 |\n",
      "|    clip_fraction        | 0.84     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.882   |\n",
      "|    explained_variance   | 0.953    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.106    |\n",
      "|    n_updates            | 2710     |\n",
      "|    policy_gradient_loss | 0.091    |\n",
      "|    std                  | 0.582    |\n",
      "|    value_loss           | 0.000108 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.6      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 284       |\n",
      "|    iterations           | 77        |\n",
      "|    time_elapsed         | 554       |\n",
      "|    total_timesteps      | 157696    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2429.2764 |\n",
      "|    clip_fraction        | 0.887     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.868    |\n",
      "|    explained_variance   | 0.955     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.108     |\n",
      "|    n_updates            | 2720      |\n",
      "|    policy_gradient_loss | 0.141     |\n",
      "|    std                  | 0.569     |\n",
      "|    value_loss           | 0.000113  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.8      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 282       |\n",
      "|    iterations           | 78        |\n",
      "|    time_elapsed         | 564       |\n",
      "|    total_timesteps      | 159744    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 51.547634 |\n",
      "|    clip_fraction        | 0.864     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.862    |\n",
      "|    explained_variance   | 0.973     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.125     |\n",
      "|    n_updates            | 2730      |\n",
      "|    policy_gradient_loss | 0.169     |\n",
      "|    std                  | 0.576     |\n",
      "|    value_loss           | 8.13e-05  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 18.6     |\n",
      "|    ep_rew_mean          | -0.457   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 282      |\n",
      "|    iterations           | 79       |\n",
      "|    time_elapsed         | 573      |\n",
      "|    total_timesteps      | 161792   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 34.9125  |\n",
      "|    clip_fraction        | 0.845    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.868   |\n",
      "|    explained_variance   | 0.971    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.151    |\n",
      "|    n_updates            | 2740     |\n",
      "|    policy_gradient_loss | 0.149    |\n",
      "|    std                  | 0.576    |\n",
      "|    value_loss           | 9.87e-05 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 281       |\n",
      "|    iterations           | 80        |\n",
      "|    time_elapsed         | 581       |\n",
      "|    total_timesteps      | 163840    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 131.62886 |\n",
      "|    clip_fraction        | 0.808     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.868    |\n",
      "|    explained_variance   | 0.97      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.106     |\n",
      "|    n_updates            | 2750      |\n",
      "|    policy_gradient_loss | 0.219     |\n",
      "|    std                  | 0.575     |\n",
      "|    value_loss           | 7.71e-05  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.9      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 280       |\n",
      "|    iterations           | 81        |\n",
      "|    time_elapsed         | 590       |\n",
      "|    total_timesteps      | 165888    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 26.997602 |\n",
      "|    clip_fraction        | 0.862     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.865    |\n",
      "|    explained_variance   | 0.977     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0761    |\n",
      "|    n_updates            | 2760      |\n",
      "|    policy_gradient_loss | 0.173     |\n",
      "|    std                  | 0.574     |\n",
      "|    value_loss           | 5.45e-05  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.8     |\n",
      "|    ep_rew_mean          | -0.462   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 280      |\n",
      "|    iterations           | 82       |\n",
      "|    time_elapsed         | 599      |\n",
      "|    total_timesteps      | 167936   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 97.25849 |\n",
      "|    clip_fraction        | 0.853    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.861   |\n",
      "|    explained_variance   | 0.972    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.411    |\n",
      "|    n_updates            | 2770     |\n",
      "|    policy_gradient_loss | 0.0915   |\n",
      "|    std                  | 0.569    |\n",
      "|    value_loss           | 6.1e-05  |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.6      |\n",
      "|    ep_rew_mean          | -0.457    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 279       |\n",
      "|    iterations           | 83        |\n",
      "|    time_elapsed         | 609       |\n",
      "|    total_timesteps      | 169984    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 183.17706 |\n",
      "|    clip_fraction        | 0.909     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.848    |\n",
      "|    explained_variance   | 0.98      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.11      |\n",
      "|    n_updates            | 2780      |\n",
      "|    policy_gradient_loss | 0.18      |\n",
      "|    std                  | 0.562     |\n",
      "|    value_loss           | 0.000216  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.4      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 278       |\n",
      "|    iterations           | 84        |\n",
      "|    time_elapsed         | 618       |\n",
      "|    total_timesteps      | 172032    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 88.187126 |\n",
      "|    clip_fraction        | 0.866     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.846    |\n",
      "|    explained_variance   | 0.985     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.204     |\n",
      "|    n_updates            | 2790      |\n",
      "|    policy_gradient_loss | 0.153     |\n",
      "|    std                  | 0.567     |\n",
      "|    value_loss           | 3.53e-05  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.8     |\n",
      "|    ep_rew_mean          | -10.5    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 277      |\n",
      "|    iterations           | 85       |\n",
      "|    time_elapsed         | 627      |\n",
      "|    total_timesteps      | 174080   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 2900.291 |\n",
      "|    clip_fraction        | 0.898    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.852   |\n",
      "|    explained_variance   | 0.982    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.186    |\n",
      "|    n_updates            | 2800     |\n",
      "|    policy_gradient_loss | 0.16     |\n",
      "|    std                  | 0.566    |\n",
      "|    value_loss           | 0.000737 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.7      |\n",
      "|    ep_rew_mean          | -0.457    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 276       |\n",
      "|    iterations           | 86        |\n",
      "|    time_elapsed         | 635       |\n",
      "|    total_timesteps      | 176128    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 93.40643  |\n",
      "|    clip_fraction        | 0.823     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.849    |\n",
      "|    explained_variance   | -0.000231 |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.24      |\n",
      "|    n_updates            | 2810      |\n",
      "|    policy_gradient_loss | 0.122     |\n",
      "|    std                  | 0.567     |\n",
      "|    value_loss           | 2.95e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 19.3     |\n",
      "|    ep_rew_mean          | -0.455   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 276      |\n",
      "|    iterations           | 87       |\n",
      "|    time_elapsed         | 644      |\n",
      "|    total_timesteps      | 178176   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 5746.815 |\n",
      "|    clip_fraction        | 0.912    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.823   |\n",
      "|    explained_variance   | -13.5    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | -0.0723  |\n",
      "|    n_updates            | 2820     |\n",
      "|    policy_gradient_loss | -0.0358  |\n",
      "|    std                  | 0.536    |\n",
      "|    value_loss           | 0.0604   |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 275       |\n",
      "|    iterations           | 88        |\n",
      "|    time_elapsed         | 653       |\n",
      "|    total_timesteps      | 180224    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 7334.0605 |\n",
      "|    clip_fraction        | 0.868     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.772    |\n",
      "|    explained_variance   | -19.7     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0246   |\n",
      "|    n_updates            | 2830      |\n",
      "|    policy_gradient_loss | -0.0452   |\n",
      "|    std                  | 0.513     |\n",
      "|    value_loss           | 0.00495   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.8      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 275       |\n",
      "|    iterations           | 89        |\n",
      "|    time_elapsed         | 662       |\n",
      "|    total_timesteps      | 182272    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 6456.1934 |\n",
      "|    clip_fraction        | 0.886     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.738    |\n",
      "|    explained_variance   | 0.533     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | -0.0405   |\n",
      "|    n_updates            | 2840      |\n",
      "|    policy_gradient_loss | 0.00497   |\n",
      "|    std                  | 0.499     |\n",
      "|    value_loss           | 0.000425  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 19       |\n",
      "|    ep_rew_mean          | -0.456   |\n",
      "| time/                   |          |\n",
      "|    fps                  | 274      |\n",
      "|    iterations           | 90       |\n",
      "|    time_elapsed         | 671      |\n",
      "|    total_timesteps      | 184320   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4701.382 |\n",
      "|    clip_fraction        | 0.886    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.71    |\n",
      "|    explained_variance   | 0.809    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.0528   |\n",
      "|    n_updates            | 2850     |\n",
      "|    policy_gradient_loss | 0.00859  |\n",
      "|    std                  | 0.485    |\n",
      "|    value_loss           | 0.000352 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.9      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 273       |\n",
      "|    iterations           | 91        |\n",
      "|    time_elapsed         | 680       |\n",
      "|    total_timesteps      | 186368    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 7103.8667 |\n",
      "|    clip_fraction        | 0.877     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.69     |\n",
      "|    explained_variance   | 0.906     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.00737   |\n",
      "|    n_updates            | 2860      |\n",
      "|    policy_gradient_loss | 0.0642    |\n",
      "|    std                  | 0.48      |\n",
      "|    value_loss           | 0.000211  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 274       |\n",
      "|    iterations           | 92        |\n",
      "|    time_elapsed         | 687       |\n",
      "|    total_timesteps      | 188416    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 17978.713 |\n",
      "|    clip_fraction        | 0.929     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.688    |\n",
      "|    explained_variance   | 0.914     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0996    |\n",
      "|    n_updates            | 2870      |\n",
      "|    policy_gradient_loss | 0.11      |\n",
      "|    std                  | 0.482     |\n",
      "|    value_loss           | 0.000161  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.8      |\n",
      "|    ep_rew_mean          | -0.459    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 273       |\n",
      "|    iterations           | 93        |\n",
      "|    time_elapsed         | 696       |\n",
      "|    total_timesteps      | 190464    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 15198.344 |\n",
      "|    clip_fraction        | 0.922     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.687    |\n",
      "|    explained_variance   | 0.912     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.183     |\n",
      "|    n_updates            | 2880      |\n",
      "|    policy_gradient_loss | 0.154     |\n",
      "|    std                  | 0.481     |\n",
      "|    value_loss           | 0.000172  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.5      |\n",
      "|    ep_rew_mean          | -0.457    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 272       |\n",
      "|    iterations           | 94        |\n",
      "|    time_elapsed         | 705       |\n",
      "|    total_timesteps      | 192512    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3133.0127 |\n",
      "|    clip_fraction        | 0.908     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.684    |\n",
      "|    explained_variance   | 0.954     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0887    |\n",
      "|    n_updates            | 2890      |\n",
      "|    policy_gradient_loss | 0.0704    |\n",
      "|    std                  | 0.477     |\n",
      "|    value_loss           | 9.55e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.5      |\n",
      "|    ep_rew_mean          | -0.46     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 272       |\n",
      "|    iterations           | 95        |\n",
      "|    time_elapsed         | 714       |\n",
      "|    total_timesteps      | 194560    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 17857.832 |\n",
      "|    clip_fraction        | 0.896     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.677    |\n",
      "|    explained_variance   | 0.947     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.104     |\n",
      "|    n_updates            | 2900      |\n",
      "|    policy_gradient_loss | 0.0408    |\n",
      "|    std                  | 0.475     |\n",
      "|    value_loss           | 0.000119  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.2     |\n",
      "|    ep_rew_mean          | -0.46    |\n",
      "| time/                   |          |\n",
      "|    fps                  | 271      |\n",
      "|    iterations           | 96       |\n",
      "|    time_elapsed         | 723      |\n",
      "|    total_timesteps      | 196608   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4422.433 |\n",
      "|    clip_fraction        | 0.928    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -0.668   |\n",
      "|    explained_variance   | 0.964    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.159    |\n",
      "|    n_updates            | 2910     |\n",
      "|    policy_gradient_loss | 0.0903   |\n",
      "|    std                  | 0.467    |\n",
      "|    value_loss           | 7.82e-05 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.2      |\n",
      "|    ep_rew_mean          | -0.461    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 271       |\n",
      "|    iterations           | 97        |\n",
      "|    time_elapsed         | 732       |\n",
      "|    total_timesteps      | 198656    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 16385.826 |\n",
      "|    clip_fraction        | 0.927     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.659    |\n",
      "|    explained_variance   | 0.964     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.178     |\n",
      "|    n_updates            | 2920      |\n",
      "|    policy_gradient_loss | 0.214     |\n",
      "|    std                  | 0.465     |\n",
      "|    value_loss           | 8.46e-05  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 19        |\n",
      "|    ep_rew_mean          | -0.456    |\n",
      "| time/                   |           |\n",
      "|    fps                  | 270       |\n",
      "|    iterations           | 98        |\n",
      "|    time_elapsed         | 741       |\n",
      "|    total_timesteps      | 200704    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 11069.834 |\n",
      "|    clip_fraction        | 0.916     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.651    |\n",
      "|    explained_variance   | 0.968     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.0619    |\n",
      "|    n_updates            | 2930      |\n",
      "|    policy_gradient_loss | 0.0668    |\n",
      "|    std                  | 0.461     |\n",
      "|    value_loss           | 6.69e-05  |\n",
      "---------------------------------------\n"
     ]
    }
   ],
   "source": [
    "model = model.learn(total_timesteps=200000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/stable_baselines3/common/evaluation.py:69: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n",
      "  UserWarning,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:-0.46 +/- 0.02\n"
     ]
    }
   ],
   "source": [
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model=model.learn(total_timesteps=100000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Agent, after 3,000,000 steps\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"ppo_acc_bigger_300000_steps\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "obs = env.reset()\n",
    "for i in range(0,3000):\n",
    "    action, _states = model.predict(obs)\n",
    "    obs, rewards, dones, info = env.step(action)\n",
    "    env.render()\n",
    "    if dones:\n",
    "        if obs[0]<=0:\n",
    "            print(obs)\n",
    "        obs = env.reset()\n",
    "        obs[1]=-np.sqrt(obs[0]*2*100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "@webio": {
   "lastCommId": null,
   "lastKernelId": null
  },
  "kernelspec": {
   "display_name": "nnequiv-tf1",
   "language": "python",
   "name": "nnequiv-tf1"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
