{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Retraining after first attempt to prove correct\n",
    "**Important note:**\n",
    "In the process of initiating the retraining we found a bug in the environment:\n",
    "Essentially, the area which now turned out to be buggy has not been trained on originally as it never occurred in the training samples due to a buggy bounds check (mixup between obstacle size `c` and wind speed `w` in `is_in_bounds`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import numpy as np\n",
    "import polytope as pc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from stable_baselines3 import PPO\n",
    "from stable_baselines3.common.evaluation import evaluate_policy\n",
    "import torch\n",
    "from torch import nn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import acc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<torch._C.Generator at 0x7fbead6b1470>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.manual_seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/gym/logger.py:34: UserWarning: \u001b[33mWARN: Environment '<class 'acc.ACCEnv2'>' has deprecated methods '_step' and '_reset' rather than 'step' and 'reset'. Compatibility code invoked. Set _gym_disable_underscore_compat = True to disable this behavior.\u001b[0m\n",
      "  warnings.warn(colorize(\"%s: %s\" % (\"WARN\", msg % args), \"yellow\"))\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('acc-variant-v1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.unwrapped.INCLUDE_UNWINNABLE = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[42]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "retrain_polytopes = None\n",
    "with open(\"acc_bigger_retrain200000-100000-0.1-polytopes.pickle\",\"rb\") as f:\n",
    "    retrain_polytopes = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "poly_region = pc.Region(retrain_polytopes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s:  19574\n",
      "total volume:  7829.599999999999\n",
      "[-0.      -7.82518] , [0.32048 0.     ]\n",
      "poly volume:  0.4050319836489656\n",
      "max share:  5.1730865388904363e-05\n"
     ]
    }
   ],
   "source": [
    "# Total volume of polytopes?\n",
    "# Share of instances usually (i.e. without \"focus polytopes\")\n",
    "\n",
    "# Volume of state space:\n",
    "N = 100000\n",
    "n=2\n",
    "l_b = np.array([0,-200])\n",
    "u_b = np.array([100,200])\n",
    "xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))\n",
    "s = 0\n",
    "for x in xs:\n",
    "    if np.sqrt(x[0]*2*env.unwrapped.A)<=x[1] and x[1]<=np.sqrt((env.unwrapped.MAX_VALUE-x[0])*2*env.unwrapped.B) and not (env.unwrapped.is_crash(x) or x[0] > env.unwrapped.MAX_VALUE):\n",
    "        s+=1\n",
    "print(\"s: \",s)\n",
    "total_vol = np.prod(u_b - l_b) * (s / N)\n",
    "print(\"total volume: \", total_vol)\n",
    "\n",
    "poly_region = pc.Region(retrain_polytopes)\n",
    "l_b, u_b = poly_region.bounding_box\n",
    "l_b = l_b.flatten()\n",
    "u_b = u_b.flatten()\n",
    "print(l_b,\",\",u_b)\n",
    "xs = env.unwrapped.np_random.uniform(low=l_b,high=u_b,size=(N,n))\n",
    "s = 0\n",
    "for x in xs:\n",
    "    if x in poly_region:\n",
    "        if -np.sqrt(x[0]*2*env.unwrapped.A)<=x[1] and x[1]<=np.sqrt((env.unwrapped.MAX_VALUE-x[0])*2*env.unwrapped.B) and not (env.unwrapped.is_crash(x) or x[0] > env.unwrapped.MAX_VALUE):\n",
    "            s+=1\n",
    "poly_vol = np.prod(u_b - l_b) * (s / N)\n",
    "print(\"poly volume: \", poly_vol)\n",
    "\n",
    "# We only have an upper bound for the share, since polytopes may be partially outside the state space of interest\n",
    "poly_share = poly_vol/total_vol\n",
    "print(\"max share: \", poly_share)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<torch._C.Generator at 0x7fbead6b1470>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.seed(42)\n",
    "torch.manual_seed(42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_episode_length=1000\n",
    "training_episode_length=100000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/stable_baselines3/common/evaluation.py:69: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n",
      "  UserWarning,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:3837.63 +/- 800.20\n"
     ]
    }
   ],
   "source": [
    "model = PPO.load(\"model_backup/acc-2000000-64-64-64-64-100000-0.1\")\n",
    "model.set_env(env)\n",
    "\n",
    "env.init_polytopes(1.0,[])\n",
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:1462.65 +/- 3001.36\n"
     ]
    }
   ],
   "source": [
    "env.init_polytopes(0.0,retrain_polytopes)\n",
    "mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "results_overall = {}\n",
    "results_polys = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.0\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 165      |\n",
      "|    ep_rew_mean     | 280      |\n",
      "| time/              |          |\n",
      "|    fps             | 517      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 3        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 195        |\n",
      "|    ep_rew_mean          | 696        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 395        |\n",
      "|    iterations           | 2          |\n",
      "|    time_elapsed         | 10         |\n",
      "|    total_timesteps      | 4096       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.05854983 |\n",
      "|    clip_fraction        | 0.0719     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.702      |\n",
      "|    explained_variance   | 0.00706    |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 44.9       |\n",
      "|    n_updates            | 1520       |\n",
      "|    policy_gradient_loss | -0.00111   |\n",
      "|    std                  | 0.118      |\n",
      "|    value_loss           | 1.01e+04   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 175         |\n",
      "|    ep_rew_mean          | 415         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 365         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 16          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007838799 |\n",
      "|    clip_fraction        | 0.128       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.724       |\n",
      "|    explained_variance   | 0.562       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.59e+03    |\n",
      "|    n_updates            | 1530        |\n",
      "|    policy_gradient_loss | -0.00387    |\n",
      "|    std                  | 0.117       |\n",
      "|    value_loss           | 8.11e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 186          |\n",
      "|    ep_rew_mean          | 578          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 354          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 23           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0049624676 |\n",
      "|    clip_fraction        | 0.0395       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.744        |\n",
      "|    explained_variance   | 0.764        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 725          |\n",
      "|    n_updates            | 1540         |\n",
      "|    policy_gradient_loss | -0.00631     |\n",
      "|    std                  | 0.114        |\n",
      "|    value_loss           | 9.33e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 210         |\n",
      "|    ep_rew_mean          | 913         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 347         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 29          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008669404 |\n",
      "|    clip_fraction        | 0.0502      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.772       |\n",
      "|    explained_variance   | 0.381       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.43        |\n",
      "|    n_updates            | 1550        |\n",
      "|    policy_gradient_loss | -0.00465    |\n",
      "|    std                  | 0.11        |\n",
      "|    value_loss           | 1.07e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 209         |\n",
      "|    ep_rew_mean          | 906         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 341         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 35          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024775116 |\n",
      "|    clip_fraction        | 0.0415      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.801       |\n",
      "|    explained_variance   | -1.7e+03    |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.18        |\n",
      "|    n_updates            | 1560        |\n",
      "|    policy_gradient_loss | -0.0017     |\n",
      "|    std                  | 0.107       |\n",
      "|    value_loss           | 2e+03       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 225          |\n",
      "|    ep_rew_mean          | 1.13e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 337          |\n",
      "|    iterations           | 7            |\n",
      "|    time_elapsed         | 42           |\n",
      "|    total_timesteps      | 14336        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0033195594 |\n",
      "|    clip_fraction        | 0.064        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.827        |\n",
      "|    explained_variance   | 0.0296       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.06e+04     |\n",
      "|    n_updates            | 1570         |\n",
      "|    policy_gradient_loss | -0.00493     |\n",
      "|    std                  | 0.105        |\n",
      "|    value_loss           | 1.55e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 239         |\n",
      "|    ep_rew_mean          | 1.33e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 333         |\n",
      "|    iterations           | 8           |\n",
      "|    time_elapsed         | 49          |\n",
      "|    total_timesteps      | 16384       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006954175 |\n",
      "|    clip_fraction        | 0.076       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.847       |\n",
      "|    explained_variance   | -148        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.96        |\n",
      "|    n_updates            | 1580        |\n",
      "|    policy_gradient_loss | 0.0051      |\n",
      "|    std                  | 0.103       |\n",
      "|    value_loss           | 328         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 241         |\n",
      "|    ep_rew_mean          | 1.36e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 332         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 55          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011491849 |\n",
      "|    clip_fraction        | 0.104       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.872       |\n",
      "|    explained_variance   | -66.3       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.04        |\n",
      "|    n_updates            | 1590        |\n",
      "|    policy_gradient_loss | -3.7e-05    |\n",
      "|    std                  | 0.0998      |\n",
      "|    value_loss           | 127         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 245          |\n",
      "|    ep_rew_mean          | 1.42e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 330          |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 61           |\n",
      "|    total_timesteps      | 20480        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0058178566 |\n",
      "|    clip_fraction        | 0.0411       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.9          |\n",
      "|    explained_variance   | 0.00304      |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.92e+04     |\n",
      "|    n_updates            | 1600         |\n",
      "|    policy_gradient_loss | -0.00531     |\n",
      "|    std                  | 0.0969       |\n",
      "|    value_loss           | 1.06e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 255          |\n",
      "|    ep_rew_mean          | 1.56e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 329          |\n",
      "|    iterations           | 11           |\n",
      "|    time_elapsed         | 68           |\n",
      "|    total_timesteps      | 22528        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038870352 |\n",
      "|    clip_fraction        | 0.11         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.927        |\n",
      "|    explained_variance   | 0.165        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 25.5         |\n",
      "|    n_updates            | 1610         |\n",
      "|    policy_gradient_loss | 0.00237      |\n",
      "|    std                  | 0.0945       |\n",
      "|    value_loss           | 6.94e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 263          |\n",
      "|    ep_rew_mean          | 1.68e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 328          |\n",
      "|    iterations           | 12           |\n",
      "|    time_elapsed         | 74           |\n",
      "|    total_timesteps      | 24576        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012527882 |\n",
      "|    clip_fraction        | 0.0396       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.954        |\n",
      "|    explained_variance   | -26.8        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.84         |\n",
      "|    n_updates            | 1620         |\n",
      "|    policy_gradient_loss | -0.00291     |\n",
      "|    std                  | 0.0921       |\n",
      "|    value_loss           | 80.8         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 265          |\n",
      "|    ep_rew_mean          | 1.71e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 327          |\n",
      "|    iterations           | 13           |\n",
      "|    time_elapsed         | 81           |\n",
      "|    total_timesteps      | 26624        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0044849645 |\n",
      "|    clip_fraction        | 0.0512       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.983        |\n",
      "|    explained_variance   | -2.27        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.2          |\n",
      "|    n_updates            | 1630         |\n",
      "|    policy_gradient_loss | -0.00272     |\n",
      "|    std                  | 0.089        |\n",
      "|    value_loss           | 22.2         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 271         |\n",
      "|    ep_rew_mean          | 1.79e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 327         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 87          |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009082735 |\n",
      "|    clip_fraction        | 0.0598      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.01        |\n",
      "|    explained_variance   | 0.0165      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 944         |\n",
      "|    n_updates            | 1640        |\n",
      "|    policy_gradient_loss | -0.00447    |\n",
      "|    std                  | 0.0873      |\n",
      "|    value_loss           | 7.32e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 287          |\n",
      "|    ep_rew_mean          | 2.03e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 325          |\n",
      "|    iterations           | 15           |\n",
      "|    time_elapsed         | 94           |\n",
      "|    total_timesteps      | 30720        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0076065985 |\n",
      "|    clip_fraction        | 0.105        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.04         |\n",
      "|    explained_variance   | 0.0497       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 205          |\n",
      "|    n_updates            | 1650         |\n",
      "|    policy_gradient_loss | -0.00144     |\n",
      "|    std                  | 0.0846       |\n",
      "|    value_loss           | 3.98e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 287          |\n",
      "|    ep_rew_mean          | 2.03e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 325          |\n",
      "|    iterations           | 16           |\n",
      "|    time_elapsed         | 100          |\n",
      "|    total_timesteps      | 32768        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0110697225 |\n",
      "|    clip_fraction        | 0.144        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.07         |\n",
      "|    explained_variance   | -20.2        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.36         |\n",
      "|    n_updates            | 1660         |\n",
      "|    policy_gradient_loss | -0.00106     |\n",
      "|    std                  | 0.0824       |\n",
      "|    value_loss           | 23.1         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 304          |\n",
      "|    ep_rew_mean          | 2.27e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 324          |\n",
      "|    iterations           | 17           |\n",
      "|    time_elapsed         | 107          |\n",
      "|    total_timesteps      | 34816        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0051908484 |\n",
      "|    clip_fraction        | 0.0716       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.09         |\n",
      "|    explained_variance   | 0.00224      |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.97         |\n",
      "|    n_updates            | 1670         |\n",
      "|    policy_gradient_loss | -0.00241     |\n",
      "|    std                  | 0.0799       |\n",
      "|    value_loss           | 1.05e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 295         |\n",
      "|    ep_rew_mean          | 2.16e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 324         |\n",
      "|    iterations           | 18          |\n",
      "|    time_elapsed         | 113         |\n",
      "|    total_timesteps      | 36864       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005266125 |\n",
      "|    clip_fraction        | 0.0188      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.12        |\n",
      "|    explained_variance   | -140        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.88        |\n",
      "|    n_updates            | 1680        |\n",
      "|    policy_gradient_loss | -0.00572    |\n",
      "|    std                  | 0.078       |\n",
      "|    value_loss           | 124         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 300          |\n",
      "|    ep_rew_mean          | 2.23e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 323          |\n",
      "|    iterations           | 19           |\n",
      "|    time_elapsed         | 120          |\n",
      "|    total_timesteps      | 38912        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0031574552 |\n",
      "|    clip_fraction        | 0.0319       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.14         |\n",
      "|    explained_variance   | 0.0256       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.64         |\n",
      "|    n_updates            | 1690         |\n",
      "|    policy_gradient_loss | -0.00761     |\n",
      "|    std                  | 0.0772       |\n",
      "|    value_loss           | 2.04e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 300        |\n",
      "|    ep_rew_mean          | 2.23e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 323        |\n",
      "|    iterations           | 20         |\n",
      "|    time_elapsed         | 126        |\n",
      "|    total_timesteps      | 40960      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.23256797 |\n",
      "|    clip_fraction        | 0.167      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.15       |\n",
      "|    explained_variance   | -0.477     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 8.38       |\n",
      "|    n_updates            | 1700       |\n",
      "|    policy_gradient_loss | 0.00328    |\n",
      "|    std                  | 0.0759     |\n",
      "|    value_loss           | 8.01e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.52e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 323         |\n",
      "|    iterations           | 21          |\n",
      "|    time_elapsed         | 132         |\n",
      "|    total_timesteps      | 43008       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003735847 |\n",
      "|    clip_fraction        | 0.127       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.18        |\n",
      "|    explained_variance   | -146        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.21        |\n",
      "|    n_updates            | 1710        |\n",
      "|    policy_gradient_loss | -0.00497    |\n",
      "|    std                  | 0.0733      |\n",
      "|    value_loss           | 130         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 316         |\n",
      "|    ep_rew_mean          | 2.47e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 322         |\n",
      "|    iterations           | 22          |\n",
      "|    time_elapsed         | 139         |\n",
      "|    total_timesteps      | 45056       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008708488 |\n",
      "|    clip_fraction        | 0.115       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.22        |\n",
      "|    explained_variance   | -3.25       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.28        |\n",
      "|    n_updates            | 1720        |\n",
      "|    policy_gradient_loss | -0.0101     |\n",
      "|    std                  | 0.0698      |\n",
      "|    value_loss           | 7.86        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 312         |\n",
      "|    ep_rew_mean          | 2.42e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 322         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 146         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008200831 |\n",
      "|    clip_fraction        | 0.0603      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.27        |\n",
      "|    explained_variance   | 0.00477     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 14.3        |\n",
      "|    n_updates            | 1730        |\n",
      "|    policy_gradient_loss | -0.0087     |\n",
      "|    std                  | 0.0669      |\n",
      "|    value_loss           | 4e+03       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 312         |\n",
      "|    ep_rew_mean          | 2.42e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 322         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 152         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008100558 |\n",
      "|    clip_fraction        | 0.0692      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.31        |\n",
      "|    explained_variance   | 0.0556      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.01        |\n",
      "|    n_updates            | 1740        |\n",
      "|    policy_gradient_loss | -0.00684    |\n",
      "|    std                  | 0.0634      |\n",
      "|    value_loss           | 3.98e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.55e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 322         |\n",
      "|    iterations           | 25          |\n",
      "|    time_elapsed         | 158         |\n",
      "|    total_timesteps      | 51200       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.017921273 |\n",
      "|    clip_fraction        | 0.123       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.38        |\n",
      "|    explained_variance   | -7.1        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.26        |\n",
      "|    n_updates            | 1750        |\n",
      "|    policy_gradient_loss | -0.00566    |\n",
      "|    std                  | 0.0595      |\n",
      "|    value_loss           | 6.98        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 308         |\n",
      "|    ep_rew_mean          | 2.38e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 320         |\n",
      "|    iterations           | 26          |\n",
      "|    time_elapsed         | 166         |\n",
      "|    total_timesteps      | 53248       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005095102 |\n",
      "|    clip_fraction        | 0.0691      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.42        |\n",
      "|    explained_variance   | -0.0115     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.08        |\n",
      "|    n_updates            | 1760        |\n",
      "|    policy_gradient_loss | -0.00187    |\n",
      "|    std                  | 0.0578      |\n",
      "|    value_loss           | 7.84e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 283          |\n",
      "|    ep_rew_mean          | 2.04e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 320          |\n",
      "|    iterations           | 27           |\n",
      "|    time_elapsed         | 172          |\n",
      "|    total_timesteps      | 55296        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0103376405 |\n",
      "|    clip_fraction        | 0.0684       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.44         |\n",
      "|    explained_variance   | 0.133        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.72         |\n",
      "|    n_updates            | 1770         |\n",
      "|    policy_gradient_loss | -0.011       |\n",
      "|    std                  | 0.0565       |\n",
      "|    value_loss           | 1.29e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 283          |\n",
      "|    ep_rew_mean          | 2.04e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 320          |\n",
      "|    iterations           | 28           |\n",
      "|    time_elapsed         | 179          |\n",
      "|    total_timesteps      | 57344        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0074174325 |\n",
      "|    clip_fraction        | 0.0712       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.46         |\n",
      "|    explained_variance   | 0.272        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.92         |\n",
      "|    n_updates            | 1780         |\n",
      "|    policy_gradient_loss | -0.00545     |\n",
      "|    std                  | 0.0553       |\n",
      "|    value_loss           | 1.5e+04      |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 283         |\n",
      "|    ep_rew_mean          | 2.05e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 319         |\n",
      "|    iterations           | 29          |\n",
      "|    time_elapsed         | 185         |\n",
      "|    total_timesteps      | 59392       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011035931 |\n",
      "|    clip_fraction        | 0.124       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.49        |\n",
      "|    explained_variance   | -2.23       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.06        |\n",
      "|    n_updates            | 1790        |\n",
      "|    policy_gradient_loss | 0.0007      |\n",
      "|    std                  | 0.0535      |\n",
      "|    value_loss           | 5.03e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 275          |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 319          |\n",
      "|    iterations           | 30           |\n",
      "|    time_elapsed         | 192          |\n",
      "|    total_timesteps      | 61440        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023714597 |\n",
      "|    clip_fraction        | 0.0847       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.53         |\n",
      "|    explained_variance   | 0.148        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.24         |\n",
      "|    n_updates            | 1800         |\n",
      "|    policy_gradient_loss | 0.00259      |\n",
      "|    std                  | 0.0517       |\n",
      "|    value_loss           | 3.85e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 275        |\n",
      "|    ep_rew_mean          | 1.94e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 319        |\n",
      "|    iterations           | 31         |\n",
      "|    time_elapsed         | 198        |\n",
      "|    total_timesteps      | 63488      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.10569733 |\n",
      "|    clip_fraction        | 0.0918     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.56       |\n",
      "|    explained_variance   | 0.0193     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 4.06       |\n",
      "|    n_updates            | 1810       |\n",
      "|    policy_gradient_loss | 0.00259    |\n",
      "|    std                  | 0.0495     |\n",
      "|    value_loss           | 1.58e+04   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 287         |\n",
      "|    ep_rew_mean          | 2.12e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 319         |\n",
      "|    iterations           | 32          |\n",
      "|    time_elapsed         | 205         |\n",
      "|    total_timesteps      | 65536       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008747693 |\n",
      "|    clip_fraction        | 0.11        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.59        |\n",
      "|    explained_variance   | 0.133       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5           |\n",
      "|    n_updates            | 1820        |\n",
      "|    policy_gradient_loss | -0.00899    |\n",
      "|    std                  | 0.0491      |\n",
      "|    value_loss           | 7.48e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 304         |\n",
      "|    ep_rew_mean          | 2.36e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 319         |\n",
      "|    iterations           | 33          |\n",
      "|    time_elapsed         | 211         |\n",
      "|    total_timesteps      | 67584       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011028957 |\n",
      "|    clip_fraction        | 0.15        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.6         |\n",
      "|    explained_variance   | -18.1       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.698       |\n",
      "|    n_updates            | 1830        |\n",
      "|    policy_gradient_loss | -0.0156     |\n",
      "|    std                  | 0.0485      |\n",
      "|    value_loss           | 13.1        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 308          |\n",
      "|    ep_rew_mean          | 2.42e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 319          |\n",
      "|    iterations           | 34           |\n",
      "|    time_elapsed         | 218          |\n",
      "|    total_timesteps      | 69632        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0141742285 |\n",
      "|    clip_fraction        | 0.0715       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.61         |\n",
      "|    explained_variance   | -0.00635     |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.94         |\n",
      "|    n_updates            | 1840         |\n",
      "|    policy_gradient_loss | -0.00636     |\n",
      "|    std                  | 0.0478       |\n",
      "|    value_loss           | 4.23e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 308        |\n",
      "|    ep_rew_mean          | 2.43e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 319        |\n",
      "|    iterations           | 35         |\n",
      "|    time_elapsed         | 224        |\n",
      "|    total_timesteps      | 71680      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00661062 |\n",
      "|    clip_fraction        | 0.177      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.63       |\n",
      "|    explained_variance   | -0.996     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.47       |\n",
      "|    n_updates            | 1850       |\n",
      "|    policy_gradient_loss | -0.0003    |\n",
      "|    std                  | 0.0468     |\n",
      "|    value_loss           | 4.83       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 304         |\n",
      "|    ep_rew_mean          | 2.37e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 36          |\n",
      "|    time_elapsed         | 231         |\n",
      "|    total_timesteps      | 73728       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006275288 |\n",
      "|    clip_fraction        | 0.042       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.65        |\n",
      "|    explained_variance   | 0.205       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.79        |\n",
      "|    n_updates            | 1860        |\n",
      "|    policy_gradient_loss | -0.0026     |\n",
      "|    std                  | 0.0461      |\n",
      "|    value_loss           | 5.86        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 304         |\n",
      "|    ep_rew_mean          | 2.38e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 37          |\n",
      "|    time_elapsed         | 237         |\n",
      "|    total_timesteps      | 75776       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004461634 |\n",
      "|    clip_fraction        | 0.0608      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.67        |\n",
      "|    explained_variance   | 0.00166     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.916       |\n",
      "|    n_updates            | 1870        |\n",
      "|    policy_gradient_loss | -0.00582    |\n",
      "|    std                  | 0.0452      |\n",
      "|    value_loss           | 4.18e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 308          |\n",
      "|    ep_rew_mean          | 2.44e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 318          |\n",
      "|    iterations           | 38           |\n",
      "|    time_elapsed         | 244          |\n",
      "|    total_timesteps      | 77824        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0059758886 |\n",
      "|    clip_fraction        | 0.0461       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.68         |\n",
      "|    explained_variance   | 0.0346       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.72         |\n",
      "|    n_updates            | 1880         |\n",
      "|    policy_gradient_loss | 6.8e-05      |\n",
      "|    std                  | 0.0445       |\n",
      "|    value_loss           | 4.17e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 308         |\n",
      "|    ep_rew_mean          | 2.44e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 39          |\n",
      "|    time_elapsed         | 250         |\n",
      "|    total_timesteps      | 79872       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004194413 |\n",
      "|    clip_fraction        | 0.0555      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.7         |\n",
      "|    explained_variance   | -0.0782     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 10.1        |\n",
      "|    n_updates            | 1890        |\n",
      "|    policy_gradient_loss | -0.00483    |\n",
      "|    std                  | 0.0443      |\n",
      "|    value_loss           | 23          |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 312         |\n",
      "|    ep_rew_mean          | 2.51e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 40          |\n",
      "|    time_elapsed         | 257         |\n",
      "|    total_timesteps      | 81920       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006259281 |\n",
      "|    clip_fraction        | 0.0555      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.71        |\n",
      "|    explained_variance   | -1.13       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.7         |\n",
      "|    n_updates            | 1900        |\n",
      "|    policy_gradient_loss | 0.000656    |\n",
      "|    std                  | 0.0434      |\n",
      "|    value_loss           | 6.3         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 316         |\n",
      "|    ep_rew_mean          | 2.57e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 41          |\n",
      "|    time_elapsed         | 263         |\n",
      "|    total_timesteps      | 83968       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009015327 |\n",
      "|    clip_fraction        | 0.0333      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.73        |\n",
      "|    explained_variance   | -0.306      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.19        |\n",
      "|    n_updates            | 1910        |\n",
      "|    policy_gradient_loss | -0.00554    |\n",
      "|    std                  | 0.0423      |\n",
      "|    value_loss           | 7.54        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 328         |\n",
      "|    ep_rew_mean          | 2.75e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 42          |\n",
      "|    time_elapsed         | 270         |\n",
      "|    total_timesteps      | 86016       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004189063 |\n",
      "|    clip_fraction        | 0.0625      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.75        |\n",
      "|    explained_variance   | 0.00744     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.72        |\n",
      "|    n_updates            | 1920        |\n",
      "|    policy_gradient_loss | -0.00209    |\n",
      "|    std                  | 0.0417      |\n",
      "|    value_loss           | 4.06e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 336         |\n",
      "|    ep_rew_mean          | 2.87e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 43          |\n",
      "|    time_elapsed         | 276         |\n",
      "|    total_timesteps      | 88064       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008911992 |\n",
      "|    clip_fraction        | 0.132       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.76        |\n",
      "|    explained_variance   | -3.17       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.61        |\n",
      "|    n_updates            | 1930        |\n",
      "|    policy_gradient_loss | -0.00915    |\n",
      "|    std                  | 0.0415      |\n",
      "|    value_loss           | 19.1        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 353         |\n",
      "|    ep_rew_mean          | 3.11e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 44          |\n",
      "|    time_elapsed         | 283         |\n",
      "|    total_timesteps      | 90112       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004710101 |\n",
      "|    clip_fraction        | 0.106       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.77        |\n",
      "|    explained_variance   | 0.0915      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 11          |\n",
      "|    n_updates            | 1940        |\n",
      "|    policy_gradient_loss | 0.00111     |\n",
      "|    std                  | 0.0411      |\n",
      "|    value_loss           | 27.7        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 361         |\n",
      "|    ep_rew_mean          | 3.23e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 317         |\n",
      "|    iterations           | 45          |\n",
      "|    time_elapsed         | 289         |\n",
      "|    total_timesteps      | 92160       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006253126 |\n",
      "|    clip_fraction        | 0.0716      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.78        |\n",
      "|    explained_variance   | 0.64        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.07        |\n",
      "|    n_updates            | 1950        |\n",
      "|    policy_gradient_loss | -0.000234   |\n",
      "|    std                  | 0.0405      |\n",
      "|    value_loss           | 17.2        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 365         |\n",
      "|    ep_rew_mean          | 3.29e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 317         |\n",
      "|    iterations           | 46          |\n",
      "|    time_elapsed         | 296         |\n",
      "|    total_timesteps      | 94208       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005300261 |\n",
      "|    clip_fraction        | 0.1         |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.8         |\n",
      "|    explained_variance   | 0.387       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.07        |\n",
      "|    n_updates            | 1960        |\n",
      "|    policy_gradient_loss | 0.00588     |\n",
      "|    std                  | 0.0396      |\n",
      "|    value_loss           | 8.93        |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 369         |\n",
      "|    ep_rew_mean          | 3.35e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 317         |\n",
      "|    iterations           | 47          |\n",
      "|    time_elapsed         | 302         |\n",
      "|    total_timesteps      | 96256       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011187069 |\n",
      "|    clip_fraction        | 0.0621      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.82        |\n",
      "|    explained_variance   | 0.515       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.76        |\n",
      "|    n_updates            | 1970        |\n",
      "|    policy_gradient_loss | -0.00541    |\n",
      "|    std                  | 0.0387      |\n",
      "|    value_loss           | 10          |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 377          |\n",
      "|    ep_rew_mean          | 3.47e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 317          |\n",
      "|    iterations           | 48           |\n",
      "|    time_elapsed         | 309          |\n",
      "|    total_timesteps      | 98304        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0040781023 |\n",
      "|    clip_fraction        | 0.063        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.86         |\n",
      "|    explained_variance   | -0.0927      |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 13.6         |\n",
      "|    n_updates            | 1980         |\n",
      "|    policy_gradient_loss | -0.00712     |\n",
      "|    std                  | 0.0369       |\n",
      "|    value_loss           | 10.9         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 385         |\n",
      "|    ep_rew_mean          | 3.59e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 317         |\n",
      "|    iterations           | 49          |\n",
      "|    time_elapsed         | 315         |\n",
      "|    total_timesteps      | 100352      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005205975 |\n",
      "|    clip_fraction        | 0.0389      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.88        |\n",
      "|    explained_variance   | 0.853       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.36        |\n",
      "|    n_updates            | 1990        |\n",
      "|    policy_gradient_loss | -0.00526    |\n",
      "|    std                  | 0.0368      |\n",
      "|    value_loss           | 14.3        |\n",
      "-----------------------------------------\n",
      "--- 318.7211503982544 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.1\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 410      |\n",
      "|    ep_rew_mean     | 3.69e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 506      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 284          |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 390          |\n",
      "|    iterations           | 2            |\n",
      "|    time_elapsed         | 10           |\n",
      "|    total_timesteps      | 4096         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0024217137 |\n",
      "|    clip_fraction        | 0.0133       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.701        |\n",
      "|    explained_variance   | -0.0734      |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.41         |\n",
      "|    n_updates            | 1520         |\n",
      "|    policy_gradient_loss | -0.00572     |\n",
      "|    std                  | 0.119        |\n",
      "|    value_loss           | 18.7         |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 230        |\n",
      "|    ep_rew_mean          | 1.18e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 357        |\n",
      "|    iterations           | 3          |\n",
      "|    time_elapsed         | 17         |\n",
      "|    total_timesteps      | 6144       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01287573 |\n",
      "|    clip_fraction        | 0.0771     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.725      |\n",
      "|    explained_variance   | 0.00166    |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 40.8       |\n",
      "|    n_updates            | 1530       |\n",
      "|    policy_gradient_loss | -0.00299   |\n",
      "|    std                  | 0.115      |\n",
      "|    value_loss           | 1.28e+04   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 244          |\n",
      "|    ep_rew_mean          | 1.38e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 345          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 23           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0043302085 |\n",
      "|    clip_fraction        | 0.0838       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.756        |\n",
      "|    explained_variance   | 0.244        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 14           |\n",
      "|    n_updates            | 1540         |\n",
      "|    policy_gradient_loss | -0.000431    |\n",
      "|    std                  | 0.112        |\n",
      "|    value_loss           | 1.31e+04     |\n",
      "------------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 253       |\n",
      "|    ep_rew_mean          | 1.5e+03   |\n",
      "| time/                   |           |\n",
      "|    fps                  | 337       |\n",
      "|    iterations           | 5         |\n",
      "|    time_elapsed         | 30        |\n",
      "|    total_timesteps      | 10240     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.3177299 |\n",
      "|    clip_fraction        | 0.0805    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.78      |\n",
      "|    explained_variance   | 0.127     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 7.14      |\n",
      "|    n_updates            | 1550      |\n",
      "|    policy_gradient_loss | 0.0152    |\n",
      "|    std                  | 0.109     |\n",
      "|    value_loss           | 6.13e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 259         |\n",
      "|    ep_rew_mean          | 1.59e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 332         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 36          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004789211 |\n",
      "|    clip_fraction        | 0.0633      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.808       |\n",
      "|    explained_variance   | 0.409       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.27e+04    |\n",
      "|    n_updates            | 1560        |\n",
      "|    policy_gradient_loss | 0.000551    |\n",
      "|    std                  | 0.107       |\n",
      "|    value_loss           | 5.62e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 254          |\n",
      "|    ep_rew_mean          | 1.53e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 330          |\n",
      "|    iterations           | 7            |\n",
      "|    time_elapsed         | 43           |\n",
      "|    total_timesteps      | 14336        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0055294046 |\n",
      "|    clip_fraction        | 0.0569       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.834        |\n",
      "|    explained_variance   | 0.191        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.7          |\n",
      "|    n_updates            | 1570         |\n",
      "|    policy_gradient_loss | -0.00716     |\n",
      "|    std                  | 0.103        |\n",
      "|    value_loss           | 6.96e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 258         |\n",
      "|    ep_rew_mean          | 1.59e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 328         |\n",
      "|    iterations           | 8           |\n",
      "|    time_elapsed         | 49          |\n",
      "|    total_timesteps      | 16384       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016542507 |\n",
      "|    clip_fraction        | 0.136       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.865       |\n",
      "|    explained_variance   | 0.0532      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 218         |\n",
      "|    n_updates            | 1580        |\n",
      "|    policy_gradient_loss | -0.00816    |\n",
      "|    std                  | 0.101       |\n",
      "|    value_loss           | 1.45e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 258         |\n",
      "|    ep_rew_mean          | 1.6e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 327         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 56          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011459956 |\n",
      "|    clip_fraction        | 0.0945      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.892       |\n",
      "|    explained_variance   | 0.131       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.13        |\n",
      "|    n_updates            | 1590        |\n",
      "|    policy_gradient_loss | -0.00936    |\n",
      "|    std                  | 0.0978      |\n",
      "|    value_loss           | 7.12e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 258        |\n",
      "|    ep_rew_mean          | 1.6e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 326        |\n",
      "|    iterations           | 10         |\n",
      "|    time_elapsed         | 62         |\n",
      "|    total_timesteps      | 20480      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01254664 |\n",
      "|    clip_fraction        | 0.0754     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.927      |\n",
      "|    explained_variance   | 0.162      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 19.8       |\n",
      "|    n_updates            | 1600       |\n",
      "|    policy_gradient_loss | -0.00226   |\n",
      "|    std                  | 0.0937     |\n",
      "|    value_loss           | 1.01e+04   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 261         |\n",
      "|    ep_rew_mean          | 1.64e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 325         |\n",
      "|    iterations           | 11          |\n",
      "|    time_elapsed         | 69          |\n",
      "|    total_timesteps      | 22528       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.027353216 |\n",
      "|    clip_fraction        | 0.0939      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.967       |\n",
      "|    explained_variance   | 0.169       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.25        |\n",
      "|    n_updates            | 1610        |\n",
      "|    policy_gradient_loss | 0.000856    |\n",
      "|    std                  | 0.0903      |\n",
      "|    value_loss           | 1e+04       |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 269          |\n",
      "|    ep_rew_mean          | 1.76e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 322          |\n",
      "|    iterations           | 12           |\n",
      "|    time_elapsed         | 76           |\n",
      "|    total_timesteps      | 24576        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0046020537 |\n",
      "|    clip_fraction        | 0.0471       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1            |\n",
      "|    explained_variance   | 0.224        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 13.3         |\n",
      "|    n_updates            | 1620         |\n",
      "|    policy_gradient_loss | -0.00709     |\n",
      "|    std                  | 0.0877       |\n",
      "|    value_loss           | 6.47e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 277          |\n",
      "|    ep_rew_mean          | 1.87e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 322          |\n",
      "|    iterations           | 13           |\n",
      "|    time_elapsed         | 82           |\n",
      "|    total_timesteps      | 26624        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0064450414 |\n",
      "|    clip_fraction        | 0.151        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.03         |\n",
      "|    explained_variance   | -32.4        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 11.7         |\n",
      "|    n_updates            | 1630         |\n",
      "|    policy_gradient_loss | -0.00207     |\n",
      "|    std                  | 0.0856       |\n",
      "|    value_loss           | 117          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 283         |\n",
      "|    ep_rew_mean          | 1.97e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 321         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 89          |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.025002774 |\n",
      "|    clip_fraction        | 0.108       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.06        |\n",
      "|    explained_variance   | -6.54       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 72.5        |\n",
      "|    n_updates            | 1640        |\n",
      "|    policy_gradient_loss | -0.00267    |\n",
      "|    std                  | 0.083       |\n",
      "|    value_loss           | 32.1        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 279          |\n",
      "|    ep_rew_mean          | 1.92e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 321          |\n",
      "|    iterations           | 15           |\n",
      "|    time_elapsed         | 95           |\n",
      "|    total_timesteps      | 30720        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0069623203 |\n",
      "|    clip_fraction        | 0.0704       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.08         |\n",
      "|    explained_variance   | -2.47        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.61         |\n",
      "|    n_updates            | 1650         |\n",
      "|    policy_gradient_loss | 0.000142     |\n",
      "|    std                  | 0.0815       |\n",
      "|    value_loss           | 38.5         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 274         |\n",
      "|    ep_rew_mean          | 1.9e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 320         |\n",
      "|    iterations           | 16          |\n",
      "|    time_elapsed         | 102         |\n",
      "|    total_timesteps      | 32768       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.036219925 |\n",
      "|    clip_fraction        | 0.0973      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.1         |\n",
      "|    explained_variance   | 0.0217      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 8.82        |\n",
      "|    n_updates            | 1660        |\n",
      "|    policy_gradient_loss | 0.00438     |\n",
      "|    std                  | 0.0797      |\n",
      "|    value_loss           | 3.97e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 273          |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 320          |\n",
      "|    iterations           | 17           |\n",
      "|    time_elapsed         | 108          |\n",
      "|    total_timesteps      | 34816        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0037218684 |\n",
      "|    clip_fraction        | 0.0459       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.11         |\n",
      "|    explained_variance   | 0.516        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.22e+03     |\n",
      "|    n_updates            | 1670         |\n",
      "|    policy_gradient_loss | 0.000549     |\n",
      "|    std                  | 0.0795       |\n",
      "|    value_loss           | 1.63e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 265          |\n",
      "|    ep_rew_mean          | 1.85e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 319          |\n",
      "|    iterations           | 18           |\n",
      "|    time_elapsed         | 115          |\n",
      "|    total_timesteps      | 36864        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038857586 |\n",
      "|    clip_fraction        | 0.0148       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.11         |\n",
      "|    explained_variance   | 0.856        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.69e+03     |\n",
      "|    n_updates            | 1680         |\n",
      "|    policy_gradient_loss | -0.0034      |\n",
      "|    std                  | 0.0795       |\n",
      "|    value_loss           | 1.12e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 271          |\n",
      "|    ep_rew_mean          | 1.94e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 319          |\n",
      "|    iterations           | 19           |\n",
      "|    time_elapsed         | 121          |\n",
      "|    total_timesteps      | 38912        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0055123484 |\n",
      "|    clip_fraction        | 0.05         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.11         |\n",
      "|    explained_variance   | 0.82         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.37e+03     |\n",
      "|    n_updates            | 1690         |\n",
      "|    policy_gradient_loss | -0.00904     |\n",
      "|    std                  | 0.0795       |\n",
      "|    value_loss           | 1.67e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 279         |\n",
      "|    ep_rew_mean          | 2.06e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 319         |\n",
      "|    iterations           | 20          |\n",
      "|    time_elapsed         | 128         |\n",
      "|    total_timesteps      | 40960       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006956038 |\n",
      "|    clip_fraction        | 0.0534      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | 0.752       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.63e+03    |\n",
      "|    n_updates            | 1700        |\n",
      "|    policy_gradient_loss | -0.00996    |\n",
      "|    std                  | 0.0795      |\n",
      "|    value_loss           | 8.76e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 284          |\n",
      "|    ep_rew_mean          | 2.13e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 318          |\n",
      "|    iterations           | 21           |\n",
      "|    time_elapsed         | 135          |\n",
      "|    total_timesteps      | 43008        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0059242593 |\n",
      "|    clip_fraction        | 0.0662       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.11         |\n",
      "|    explained_variance   | 0.546        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 657          |\n",
      "|    n_updates            | 1710         |\n",
      "|    policy_gradient_loss | -0.0105      |\n",
      "|    std                  | 0.0794       |\n",
      "|    value_loss           | 2.2e+03      |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 288        |\n",
      "|    ep_rew_mean          | 2.2e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 318        |\n",
      "|    iterations           | 22         |\n",
      "|    time_elapsed         | 141        |\n",
      "|    total_timesteps      | 45056      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.14676717 |\n",
      "|    clip_fraction        | 0.122      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.12       |\n",
      "|    explained_variance   | 0.797      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 196        |\n",
      "|    n_updates            | 1720       |\n",
      "|    policy_gradient_loss | -0.00527   |\n",
      "|    std                  | 0.0792     |\n",
      "|    value_loss           | 700        |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 300          |\n",
      "|    ep_rew_mean          | 2.37e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 317          |\n",
      "|    iterations           | 23           |\n",
      "|    time_elapsed         | 148          |\n",
      "|    total_timesteps      | 47104        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0129692275 |\n",
      "|    clip_fraction        | 0.141        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.12         |\n",
      "|    explained_variance   | 0.465        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 83.8         |\n",
      "|    n_updates            | 1730         |\n",
      "|    policy_gradient_loss | -0.00847     |\n",
      "|    std                  | 0.0791       |\n",
      "|    value_loss           | 246          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 304         |\n",
      "|    ep_rew_mean          | 2.43e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 317         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 154         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.050168563 |\n",
      "|    clip_fraction        | 0.172       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.12        |\n",
      "|    explained_variance   | 0.528       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 17.4        |\n",
      "|    n_updates            | 1740        |\n",
      "|    policy_gradient_loss | 0.0044      |\n",
      "|    std                  | 0.0783      |\n",
      "|    value_loss           | 54.4        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 308         |\n",
      "|    ep_rew_mean          | 2.49e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 317         |\n",
      "|    iterations           | 25          |\n",
      "|    time_elapsed         | 161         |\n",
      "|    total_timesteps      | 51200       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016997702 |\n",
      "|    clip_fraction        | 0.0976      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.14        |\n",
      "|    explained_variance   | 0.556       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 11.5        |\n",
      "|    n_updates            | 1750        |\n",
      "|    policy_gradient_loss | 0.00193     |\n",
      "|    std                  | 0.0773      |\n",
      "|    value_loss           | 36.3        |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 316          |\n",
      "|    ep_rew_mean          | 2.61e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 316          |\n",
      "|    iterations           | 26           |\n",
      "|    time_elapsed         | 168          |\n",
      "|    total_timesteps      | 53248        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023851134 |\n",
      "|    clip_fraction        | 0.112        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.15         |\n",
      "|    explained_variance   | 0.0971       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 20.8         |\n",
      "|    n_updates            | 1760         |\n",
      "|    policy_gradient_loss | 0.0102       |\n",
      "|    std                  | 0.0767       |\n",
      "|    value_loss           | 3.77e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.67e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 316         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 174         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.051382046 |\n",
      "|    clip_fraction        | 0.0777      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.15        |\n",
      "|    explained_variance   | 0.753       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 31.5        |\n",
      "|    n_updates            | 1770        |\n",
      "|    policy_gradient_loss | 0.00225     |\n",
      "|    std                  | 0.0763      |\n",
      "|    value_loss           | 50.5        |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 316       |\n",
      "|    ep_rew_mean          | 2.61e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 316       |\n",
      "|    iterations           | 28        |\n",
      "|    time_elapsed         | 181       |\n",
      "|    total_timesteps      | 57344     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 30.945034 |\n",
      "|    clip_fraction        | 0.278     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.15      |\n",
      "|    explained_variance   | 0.896     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 29        |\n",
      "|    n_updates            | 1780      |\n",
      "|    policy_gradient_loss | 0.0466    |\n",
      "|    std                  | 0.0763    |\n",
      "|    value_loss           | 129       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 316       |\n",
      "|    ep_rew_mean          | 2.61e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 316       |\n",
      "|    iterations           | 29        |\n",
      "|    time_elapsed         | 187       |\n",
      "|    total_timesteps      | 59392     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.1255186 |\n",
      "|    clip_fraction        | 0.394     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.16      |\n",
      "|    explained_variance   | 0.0196    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 19.6      |\n",
      "|    n_updates            | 1790      |\n",
      "|    policy_gradient_loss | 0.0607    |\n",
      "|    std                  | 0.075     |\n",
      "|    value_loss           | 4.04e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.67e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 316         |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 194         |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.021030094 |\n",
      "|    clip_fraction        | 0.141       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.21        |\n",
      "|    explained_variance   | -2.93       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.41        |\n",
      "|    n_updates            | 1800        |\n",
      "|    policy_gradient_loss | 0.00114     |\n",
      "|    std                  | 0.0701      |\n",
      "|    value_loss           | 9.75        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 323         |\n",
      "|    ep_rew_mean          | 2.7e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 316         |\n",
      "|    iterations           | 31          |\n",
      "|    time_elapsed         | 200         |\n",
      "|    total_timesteps      | 63488       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013464937 |\n",
      "|    clip_fraction        | 0.133       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.28        |\n",
      "|    explained_variance   | -0.104      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.53        |\n",
      "|    n_updates            | 1810        |\n",
      "|    policy_gradient_loss | -0.00967    |\n",
      "|    std                  | 0.0651      |\n",
      "|    value_loss           | 8.68        |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 335       |\n",
      "|    ep_rew_mean          | 2.86e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 315       |\n",
      "|    iterations           | 32        |\n",
      "|    time_elapsed         | 207       |\n",
      "|    total_timesteps      | 65536     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.5953965 |\n",
      "|    clip_fraction        | 0.192     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.32      |\n",
      "|    explained_variance   | 0.0966    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 50.6      |\n",
      "|    n_updates            | 1820      |\n",
      "|    policy_gradient_loss | 0.0176    |\n",
      "|    std                  | 0.064     |\n",
      "|    value_loss           | 79        |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 345         |\n",
      "|    ep_rew_mean          | 2.97e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 315         |\n",
      "|    iterations           | 33          |\n",
      "|    time_elapsed         | 214         |\n",
      "|    total_timesteps      | 67584       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014929443 |\n",
      "|    clip_fraction        | 0.132       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.37        |\n",
      "|    explained_variance   | 0.0246      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.55        |\n",
      "|    n_updates            | 1830        |\n",
      "|    policy_gradient_loss | -0.0138     |\n",
      "|    std                  | 0.0593      |\n",
      "|    value_loss           | 5.38        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 342        |\n",
      "|    ep_rew_mean          | 2.9e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 315        |\n",
      "|    iterations           | 34         |\n",
      "|    time_elapsed         | 220        |\n",
      "|    total_timesteps      | 69632      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.45714042 |\n",
      "|    clip_fraction        | 0.0911     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.42       |\n",
      "|    explained_variance   | 0.0162     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.15       |\n",
      "|    n_updates            | 1840       |\n",
      "|    policy_gradient_loss | 0.032      |\n",
      "|    std                  | 0.058      |\n",
      "|    value_loss           | 7.73e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 351       |\n",
      "|    ep_rew_mean          | 3e+03     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 315       |\n",
      "|    iterations           | 35        |\n",
      "|    time_elapsed         | 227       |\n",
      "|    total_timesteps      | 71680     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.5706775 |\n",
      "|    clip_fraction        | 0.155     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.43      |\n",
      "|    explained_variance   | 0.0839    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 192       |\n",
      "|    n_updates            | 1850      |\n",
      "|    policy_gradient_loss | 0.00456   |\n",
      "|    std                  | 0.0578    |\n",
      "|    value_loss           | 1.97e+04  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 341         |\n",
      "|    ep_rew_mean          | 2.85e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 315         |\n",
      "|    iterations           | 36          |\n",
      "|    time_elapsed         | 234         |\n",
      "|    total_timesteps      | 73728       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013217899 |\n",
      "|    clip_fraction        | 0.0924      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.43        |\n",
      "|    explained_variance   | 0.403       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 89.3        |\n",
      "|    n_updates            | 1860        |\n",
      "|    policy_gradient_loss | -0.00415    |\n",
      "|    std                  | 0.058       |\n",
      "|    value_loss           | 9.07e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 300        |\n",
      "|    ep_rew_mean          | 2.26e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 315        |\n",
      "|    iterations           | 37         |\n",
      "|    time_elapsed         | 240        |\n",
      "|    total_timesteps      | 75776      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02990856 |\n",
      "|    clip_fraction        | 0.0748     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.43       |\n",
      "|    explained_variance   | 0.254      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 20.8       |\n",
      "|    n_updates            | 1870       |\n",
      "|    policy_gradient_loss | 0.0335     |\n",
      "|    std                  | 0.0574     |\n",
      "|    value_loss           | 9.53e+03   |\n",
      "----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 291      |\n",
      "|    ep_rew_mean          | 2.14e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 314      |\n",
      "|    iterations           | 38       |\n",
      "|    time_elapsed         | 247      |\n",
      "|    total_timesteps      | 77824    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 5.289793 |\n",
      "|    clip_fraction        | 0.192    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.44     |\n",
      "|    explained_variance   | 0.286    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 24.9     |\n",
      "|    n_updates            | 1880     |\n",
      "|    policy_gradient_loss | 0.0496   |\n",
      "|    std                  | 0.0569   |\n",
      "|    value_loss           | 1.72e+04 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 259       |\n",
      "|    ep_rew_mean          | 1.68e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 314       |\n",
      "|    iterations           | 39        |\n",
      "|    time_elapsed         | 253       |\n",
      "|    total_timesteps      | 79872     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4.3500967 |\n",
      "|    clip_fraction        | 0.0885    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.47      |\n",
      "|    explained_variance   | -0.554    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.97      |\n",
      "|    n_updates            | 1890      |\n",
      "|    policy_gradient_loss | 0.0436    |\n",
      "|    std                  | 0.0545    |\n",
      "|    value_loss           | 6.31e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 251         |\n",
      "|    ep_rew_mean          | 1.57e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 40          |\n",
      "|    time_elapsed         | 260         |\n",
      "|    total_timesteps      | 81920       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.066983625 |\n",
      "|    clip_fraction        | 0.0351      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.5         |\n",
      "|    explained_variance   | 0.377       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 27.8        |\n",
      "|    n_updates            | 1900        |\n",
      "|    policy_gradient_loss | -0.00579    |\n",
      "|    std                  | 0.0533      |\n",
      "|    value_loss           | 1.16e+04    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 223        |\n",
      "|    ep_rew_mean          | 1.16e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 41         |\n",
      "|    time_elapsed         | 266        |\n",
      "|    total_timesteps      | 83968      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.30596584 |\n",
      "|    clip_fraction        | 0.0353     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.52       |\n",
      "|    explained_variance   | 0.37       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 217        |\n",
      "|    n_updates            | 1910       |\n",
      "|    policy_gradient_loss | 0.00367    |\n",
      "|    std                  | 0.053      |\n",
      "|    value_loss           | 1.05e+04   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 190        |\n",
      "|    ep_rew_mean          | 698        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 42         |\n",
      "|    time_elapsed         | 273        |\n",
      "|    total_timesteps      | 86016      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.54234976 |\n",
      "|    clip_fraction        | 0.213      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.52       |\n",
      "|    explained_variance   | 0.591      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 208        |\n",
      "|    n_updates            | 1920       |\n",
      "|    policy_gradient_loss | -0.0118    |\n",
      "|    std                  | 0.0526     |\n",
      "|    value_loss           | 1.43e+04   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 190          |\n",
      "|    ep_rew_mean          | 703          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 314          |\n",
      "|    iterations           | 43           |\n",
      "|    time_elapsed         | 279          |\n",
      "|    total_timesteps      | 88064        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0052699572 |\n",
      "|    clip_fraction        | 0.0467       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.55         |\n",
      "|    explained_variance   | 0.82         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.44         |\n",
      "|    n_updates            | 1930         |\n",
      "|    policy_gradient_loss | -0.000855    |\n",
      "|    std                  | 0.0504       |\n",
      "|    value_loss           | 8.42e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 190        |\n",
      "|    ep_rew_mean          | 708        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 44         |\n",
      "|    time_elapsed         | 286        |\n",
      "|    total_timesteps      | 90112      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.11023162 |\n",
      "|    clip_fraction        | 0.176      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.58       |\n",
      "|    explained_variance   | 0.961      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.74       |\n",
      "|    n_updates            | 1940       |\n",
      "|    policy_gradient_loss | -0.00594   |\n",
      "|    std                  | 0.0491     |\n",
      "|    value_loss           | 914        |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 198       |\n",
      "|    ep_rew_mean          | 828       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 314       |\n",
      "|    iterations           | 45        |\n",
      "|    time_elapsed         | 293       |\n",
      "|    total_timesteps      | 92160     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.5740595 |\n",
      "|    clip_fraction        | 0.11      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.63      |\n",
      "|    explained_variance   | 0.421     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 617       |\n",
      "|    n_updates            | 1950      |\n",
      "|    policy_gradient_loss | -0.000674 |\n",
      "|    std                  | 0.0463    |\n",
      "|    value_loss           | 7.34e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 202      |\n",
      "|    ep_rew_mean          | 890      |\n",
      "| time/                   |          |\n",
      "|    fps                  | 314      |\n",
      "|    iterations           | 46       |\n",
      "|    time_elapsed         | 299      |\n",
      "|    total_timesteps      | 94208    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 1.702312 |\n",
      "|    clip_fraction        | 0.123    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.67     |\n",
      "|    explained_variance   | -2.06    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 2.43     |\n",
      "|    n_updates            | 1960     |\n",
      "|    policy_gradient_loss | 0.0492   |\n",
      "|    std                  | 0.0449   |\n",
      "|    value_loss           | 5.87e+03 |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 219        |\n",
      "|    ep_rew_mean          | 1.13e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 47         |\n",
      "|    time_elapsed         | 305        |\n",
      "|    total_timesteps      | 96256      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.11944665 |\n",
      "|    clip_fraction        | 0.0836     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.72       |\n",
      "|    explained_variance   | -0.0135    |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.2        |\n",
      "|    n_updates            | 1970       |\n",
      "|    policy_gradient_loss | 0.027      |\n",
      "|    std                  | 0.0423     |\n",
      "|    value_loss           | 3.04       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 214         |\n",
      "|    ep_rew_mean          | 1.08e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 312         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.036599696 |\n",
      "|    clip_fraction        | 0.0816      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.78        |\n",
      "|    explained_variance   | -0.998      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.332       |\n",
      "|    n_updates            | 1980        |\n",
      "|    policy_gradient_loss | -0.00989    |\n",
      "|    std                  | 0.0394      |\n",
      "|    value_loss           | 2.47        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 210          |\n",
      "|    ep_rew_mean          | 1.02e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 314          |\n",
      "|    iterations           | 49           |\n",
      "|    time_elapsed         | 319          |\n",
      "|    total_timesteps      | 100352       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0066373246 |\n",
      "|    clip_fraction        | 0.114        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.83         |\n",
      "|    explained_variance   | 0.00301      |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 29.7         |\n",
      "|    n_updates            | 1990         |\n",
      "|    policy_gradient_loss | 0.00126      |\n",
      "|    std                  | 0.0382       |\n",
      "|    value_loss           | 2.04e+04     |\n",
      "------------------------------------------\n",
      "--- 322.04772686958313 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.5\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 274      |\n",
      "|    ep_rew_mean     | 1.78e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 506      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 308          |\n",
      "|    ep_rew_mean          | 2.25e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 386          |\n",
      "|    iterations           | 2            |\n",
      "|    time_elapsed         | 10           |\n",
      "|    total_timesteps      | 4096         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027224135 |\n",
      "|    clip_fraction        | 0.0656       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.706        |\n",
      "|    explained_variance   | 0.0119       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.6          |\n",
      "|    n_updates            | 1520         |\n",
      "|    policy_gradient_loss | -0.00013     |\n",
      "|    std                  | 0.118        |\n",
      "|    value_loss           | 6.71e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 267        |\n",
      "|    ep_rew_mean          | 1.87e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 356        |\n",
      "|    iterations           | 3          |\n",
      "|    time_elapsed         | 17         |\n",
      "|    total_timesteps      | 6144       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.17788914 |\n",
      "|    clip_fraction        | 0.127      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.721      |\n",
      "|    explained_variance   | 0.192      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 12.7       |\n",
      "|    n_updates            | 1530       |\n",
      "|    policy_gradient_loss | 0.0292     |\n",
      "|    std                  | 0.117      |\n",
      "|    value_loss           | 3.23e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 223          |\n",
      "|    ep_rew_mean          | 1.38e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 344          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 23           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015048007 |\n",
      "|    clip_fraction        | 0.0151       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.726        |\n",
      "|    explained_variance   | 0.246        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.48e+03     |\n",
      "|    n_updates            | 1540         |\n",
      "|    policy_gradient_loss | 0.00319      |\n",
      "|    std                  | 0.117        |\n",
      "|    value_loss           | 1.47e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 205         |\n",
      "|    ep_rew_mean          | 1.17e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 338         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 30          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010498941 |\n",
      "|    clip_fraction        | 0.0276      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.726       |\n",
      "|    explained_variance   | 0.789       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.62e+04    |\n",
      "|    n_updates            | 1550        |\n",
      "|    policy_gradient_loss | 0.000119    |\n",
      "|    std                  | 0.117       |\n",
      "|    value_loss           | 1.55e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 213          |\n",
      "|    ep_rew_mean          | 1.27e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 333          |\n",
      "|    iterations           | 6            |\n",
      "|    time_elapsed         | 36           |\n",
      "|    total_timesteps      | 12288        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0043118126 |\n",
      "|    clip_fraction        | 0.0496       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.726        |\n",
      "|    explained_variance   | 0.82         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.56e+03     |\n",
      "|    n_updates            | 1560         |\n",
      "|    policy_gradient_loss | -0.00848     |\n",
      "|    std                  | 0.117        |\n",
      "|    value_loss           | 2.29e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 212         |\n",
      "|    ep_rew_mean          | 1.28e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 329         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 43          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008632775 |\n",
      "|    clip_fraction        | 0.0241      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.726       |\n",
      "|    explained_variance   | 0.731       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 57          |\n",
      "|    n_updates            | 1570        |\n",
      "|    policy_gradient_loss | 0.000487    |\n",
      "|    std                  | 0.117       |\n",
      "|    value_loss           | 4.2e+03     |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 212          |\n",
      "|    ep_rew_mean          | 1.29e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 326          |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 50           |\n",
      "|    total_timesteps      | 16384        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0073547605 |\n",
      "|    clip_fraction        | 0.0646       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.727        |\n",
      "|    explained_variance   | 0.891        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 282          |\n",
      "|    n_updates            | 1580         |\n",
      "|    policy_gradient_loss | 0.01         |\n",
      "|    std                  | 0.117        |\n",
      "|    value_loss           | 3.37e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 214        |\n",
      "|    ep_rew_mean          | 1.28e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 323        |\n",
      "|    iterations           | 9          |\n",
      "|    time_elapsed         | 56         |\n",
      "|    total_timesteps      | 18432      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.07203703 |\n",
      "|    clip_fraction        | 0.0847     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.729      |\n",
      "|    explained_variance   | 0.938      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 611        |\n",
      "|    n_updates            | 1590       |\n",
      "|    policy_gradient_loss | 0.00821    |\n",
      "|    std                  | 0.117      |\n",
      "|    value_loss           | 3.36e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 203         |\n",
      "|    ep_rew_mean          | 1.14e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 322         |\n",
      "|    iterations           | 10          |\n",
      "|    time_elapsed         | 63          |\n",
      "|    total_timesteps      | 20480       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.109954804 |\n",
      "|    clip_fraction        | 0.0609      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.729       |\n",
      "|    explained_variance   | 0.593       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 50.5        |\n",
      "|    n_updates            | 1600        |\n",
      "|    policy_gradient_loss | 0.00986     |\n",
      "|    std                  | 0.117       |\n",
      "|    value_loss           | 8.24e+03    |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 188      |\n",
      "|    ep_rew_mean          | 993      |\n",
      "| time/                   |          |\n",
      "|    fps                  | 320      |\n",
      "|    iterations           | 11       |\n",
      "|    time_elapsed         | 70       |\n",
      "|    total_timesteps      | 22528    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 0.341569 |\n",
      "|    clip_fraction        | 0.112    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.731    |\n",
      "|    explained_variance   | 0.963    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 18.9     |\n",
      "|    n_updates            | 1610     |\n",
      "|    policy_gradient_loss | 0.00761  |\n",
      "|    std                  | 0.116    |\n",
      "|    value_loss           | 570      |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 187         |\n",
      "|    ep_rew_mean          | 985         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 319         |\n",
      "|    iterations           | 12          |\n",
      "|    time_elapsed         | 76          |\n",
      "|    total_timesteps      | 24576       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004257901 |\n",
      "|    clip_fraction        | 0.0466      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.731       |\n",
      "|    explained_variance   | 0.75        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 9.21e+03    |\n",
      "|    n_updates            | 1620        |\n",
      "|    policy_gradient_loss | -0.0075     |\n",
      "|    std                  | 0.116       |\n",
      "|    value_loss           | 1.58e+04    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 193        |\n",
      "|    ep_rew_mean          | 1.06e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 318        |\n",
      "|    iterations           | 13         |\n",
      "|    time_elapsed         | 83         |\n",
      "|    total_timesteps      | 26624      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.21613826 |\n",
      "|    clip_fraction        | 0.0677     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.731      |\n",
      "|    explained_variance   | 0.965      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.09e+03   |\n",
      "|    n_updates            | 1630       |\n",
      "|    policy_gradient_loss | 0.000124   |\n",
      "|    std                  | 0.116      |\n",
      "|    value_loss           | 1.59e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 197         |\n",
      "|    ep_rew_mean          | 1.1e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 318         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 90          |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009593974 |\n",
      "|    clip_fraction        | 0.067       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.732       |\n",
      "|    explained_variance   | 0.912       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 699         |\n",
      "|    n_updates            | 1640        |\n",
      "|    policy_gradient_loss | 0.0158      |\n",
      "|    std                  | 0.116       |\n",
      "|    value_loss           | 6.29e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 200          |\n",
      "|    ep_rew_mean          | 1.16e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 317          |\n",
      "|    iterations           | 15           |\n",
      "|    time_elapsed         | 96           |\n",
      "|    total_timesteps      | 30720        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0039554397 |\n",
      "|    clip_fraction        | 0.0285       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.733        |\n",
      "|    explained_variance   | 0.49         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.73e+03     |\n",
      "|    n_updates            | 1650         |\n",
      "|    policy_gradient_loss | -0.00165     |\n",
      "|    std                  | 0.116        |\n",
      "|    value_loss           | 1.74e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 193          |\n",
      "|    ep_rew_mean          | 1.08e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 317          |\n",
      "|    iterations           | 16           |\n",
      "|    time_elapsed         | 103          |\n",
      "|    total_timesteps      | 32768        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0031412616 |\n",
      "|    clip_fraction        | 0.0351       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.733        |\n",
      "|    explained_variance   | 0.887        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.87e+03     |\n",
      "|    n_updates            | 1660         |\n",
      "|    policy_gradient_loss | 0.000718     |\n",
      "|    std                  | 0.116        |\n",
      "|    value_loss           | 7.25e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 198          |\n",
      "|    ep_rew_mean          | 1.13e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 316          |\n",
      "|    iterations           | 17           |\n",
      "|    time_elapsed         | 109          |\n",
      "|    total_timesteps      | 34816        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029042466 |\n",
      "|    clip_fraction        | 0.0447       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.733        |\n",
      "|    explained_variance   | 0.886        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.74e+03     |\n",
      "|    n_updates            | 1670         |\n",
      "|    policy_gradient_loss | 4.92e-05     |\n",
      "|    std                  | 0.116        |\n",
      "|    value_loss           | 4.84e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 195          |\n",
      "|    ep_rew_mean          | 1.09e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 316          |\n",
      "|    iterations           | 18           |\n",
      "|    time_elapsed         | 116          |\n",
      "|    total_timesteps      | 36864        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023853478 |\n",
      "|    clip_fraction        | 0.0362       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.734        |\n",
      "|    explained_variance   | 0.693        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.03e+04     |\n",
      "|    n_updates            | 1680         |\n",
      "|    policy_gradient_loss | -0.00749     |\n",
      "|    std                  | 0.116        |\n",
      "|    value_loss           | 2.2e+04      |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 189          |\n",
      "|    ep_rew_mean          | 1.06e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 316          |\n",
      "|    iterations           | 19           |\n",
      "|    time_elapsed         | 122          |\n",
      "|    total_timesteps      | 38912        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0059387013 |\n",
      "|    clip_fraction        | 0.0389       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.734        |\n",
      "|    explained_variance   | 0.863        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.51e+03     |\n",
      "|    n_updates            | 1690         |\n",
      "|    policy_gradient_loss | 0.00787      |\n",
      "|    std                  | 0.116        |\n",
      "|    value_loss           | 1.01e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 201         |\n",
      "|    ep_rew_mean          | 1.21e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 315         |\n",
      "|    iterations           | 20          |\n",
      "|    time_elapsed         | 129         |\n",
      "|    total_timesteps      | 40960       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005037585 |\n",
      "|    clip_fraction        | 0.0153      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.734       |\n",
      "|    explained_variance   | 0.84        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.11e+03    |\n",
      "|    n_updates            | 1700        |\n",
      "|    policy_gradient_loss | 0.00242     |\n",
      "|    std                  | 0.116       |\n",
      "|    value_loss           | 2.12e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 208          |\n",
      "|    ep_rew_mean          | 1.3e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 315          |\n",
      "|    iterations           | 21           |\n",
      "|    time_elapsed         | 136          |\n",
      "|    total_timesteps      | 43008        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026121866 |\n",
      "|    clip_fraction        | 0.0312       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.735        |\n",
      "|    explained_variance   | 0.824        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.34e+03     |\n",
      "|    n_updates            | 1710         |\n",
      "|    policy_gradient_loss | -0.00183     |\n",
      "|    std                  | 0.116        |\n",
      "|    value_loss           | 9.91e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 209        |\n",
      "|    ep_rew_mean          | 1.31e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 315        |\n",
      "|    iterations           | 22         |\n",
      "|    time_elapsed         | 142        |\n",
      "|    total_timesteps      | 45056      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02014736 |\n",
      "|    clip_fraction        | 0.0568     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.738      |\n",
      "|    explained_variance   | 0.765      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 126        |\n",
      "|    n_updates            | 1720       |\n",
      "|    policy_gradient_loss | -0.00127   |\n",
      "|    std                  | 0.115      |\n",
      "|    value_loss           | 3.43e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 209         |\n",
      "|    ep_rew_mean          | 1.32e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 315         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 149         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003478502 |\n",
      "|    clip_fraction        | 0.0388      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.742       |\n",
      "|    explained_variance   | 0.873       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.39e+04    |\n",
      "|    n_updates            | 1730        |\n",
      "|    policy_gradient_loss | 0.00365     |\n",
      "|    std                  | 0.115       |\n",
      "|    value_loss           | 7.06e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 185        |\n",
      "|    ep_rew_mean          | 1.06e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 315        |\n",
      "|    iterations           | 24         |\n",
      "|    time_elapsed         | 155        |\n",
      "|    total_timesteps      | 49152      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.13284256 |\n",
      "|    clip_fraction        | 0.0482     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.744      |\n",
      "|    explained_variance   | 0.982      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 126        |\n",
      "|    n_updates            | 1740       |\n",
      "|    policy_gradient_loss | 0.0132     |\n",
      "|    std                  | 0.115      |\n",
      "|    value_loss           | 425        |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 175          |\n",
      "|    ep_rew_mean          | 933          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 315          |\n",
      "|    iterations           | 25           |\n",
      "|    time_elapsed         | 162          |\n",
      "|    total_timesteps      | 51200        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0044932575 |\n",
      "|    clip_fraction        | 0.0446       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.747        |\n",
      "|    explained_variance   | 0.821        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.37e+03     |\n",
      "|    n_updates            | 1750         |\n",
      "|    policy_gradient_loss | -0.00149     |\n",
      "|    std                  | 0.115        |\n",
      "|    value_loss           | 1.49e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 161         |\n",
      "|    ep_rew_mean          | 791         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 26          |\n",
      "|    time_elapsed         | 169         |\n",
      "|    total_timesteps      | 53248       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008835901 |\n",
      "|    clip_fraction        | 0.05        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.748       |\n",
      "|    explained_variance   | 0.94        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 788         |\n",
      "|    n_updates            | 1760        |\n",
      "|    policy_gradient_loss | -0.00348    |\n",
      "|    std                  | 0.115       |\n",
      "|    value_loss           | 7e+03       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 159         |\n",
      "|    ep_rew_mean          | 747         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 175         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009492062 |\n",
      "|    clip_fraction        | 0.033       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.748       |\n",
      "|    explained_variance   | 0.915       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.25e+04    |\n",
      "|    n_updates            | 1770        |\n",
      "|    policy_gradient_loss | -0.00422    |\n",
      "|    std                  | 0.115       |\n",
      "|    value_loss           | 2.25e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 145         |\n",
      "|    ep_rew_mean          | 585         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 182         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019416155 |\n",
      "|    clip_fraction        | 0.0438      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.748       |\n",
      "|    explained_variance   | 0.943       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.71e+04    |\n",
      "|    n_updates            | 1780        |\n",
      "|    policy_gradient_loss | -0.00169    |\n",
      "|    std                  | 0.115       |\n",
      "|    value_loss           | 1.36e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 145          |\n",
      "|    ep_rew_mean          | 589          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 314          |\n",
      "|    iterations           | 29           |\n",
      "|    time_elapsed         | 188          |\n",
      "|    total_timesteps      | 59392        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0068110893 |\n",
      "|    clip_fraction        | 0.0519       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.748        |\n",
      "|    explained_variance   | 0.856        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.04e+04     |\n",
      "|    n_updates            | 1790         |\n",
      "|    policy_gradient_loss | -0.00374     |\n",
      "|    std                  | 0.114        |\n",
      "|    value_loss           | 1.93e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 153         |\n",
      "|    ep_rew_mean          | 676         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 195         |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004688157 |\n",
      "|    clip_fraction        | 0.0475      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.749       |\n",
      "|    explained_variance   | 0.901       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.9e+03     |\n",
      "|    n_updates            | 1800        |\n",
      "|    policy_gradient_loss | -0.00419    |\n",
      "|    std                  | 0.114       |\n",
      "|    value_loss           | 1.05e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 161         |\n",
      "|    ep_rew_mean          | 776         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 31          |\n",
      "|    time_elapsed         | 202         |\n",
      "|    total_timesteps      | 63488       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011971602 |\n",
      "|    clip_fraction        | 0.108       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.753       |\n",
      "|    explained_variance   | 0.416       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 86          |\n",
      "|    n_updates            | 1810        |\n",
      "|    policy_gradient_loss | 0.00264     |\n",
      "|    std                  | 0.114       |\n",
      "|    value_loss           | 376         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 175        |\n",
      "|    ep_rew_mean          | 937        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 313        |\n",
      "|    iterations           | 32         |\n",
      "|    time_elapsed         | 208        |\n",
      "|    total_timesteps      | 65536      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02014458 |\n",
      "|    clip_fraction        | 0.0812     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.754      |\n",
      "|    explained_variance   | 0.722      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 38.4       |\n",
      "|    n_updates            | 1820       |\n",
      "|    policy_gradient_loss | 0.0012     |\n",
      "|    std                  | 0.114      |\n",
      "|    value_loss           | 1.18e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 190         |\n",
      "|    ep_rew_mean          | 1.12e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 33          |\n",
      "|    time_elapsed         | 215         |\n",
      "|    total_timesteps      | 67584       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007467859 |\n",
      "|    clip_fraction        | 0.0891      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.754       |\n",
      "|    explained_variance   | 0.932       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 11.2        |\n",
      "|    n_updates            | 1830        |\n",
      "|    policy_gradient_loss | -0.00119    |\n",
      "|    std                  | 0.114       |\n",
      "|    value_loss           | 270         |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 208          |\n",
      "|    ep_rew_mean          | 1.33e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 313          |\n",
      "|    iterations           | 34           |\n",
      "|    time_elapsed         | 221          |\n",
      "|    total_timesteps      | 69632        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0051582265 |\n",
      "|    clip_fraction        | 0.0958       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.763        |\n",
      "|    explained_variance   | -0.86        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.71         |\n",
      "|    n_updates            | 1840         |\n",
      "|    policy_gradient_loss | 0.00821      |\n",
      "|    std                  | 0.112        |\n",
      "|    value_loss           | 36.8         |\n",
      "------------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 219       |\n",
      "|    ep_rew_mean          | 1.45e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 35        |\n",
      "|    time_elapsed         | 228       |\n",
      "|    total_timesteps      | 71680     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0749252 |\n",
      "|    clip_fraction        | 0.0956    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.776     |\n",
      "|    explained_variance   | 0.971     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 83        |\n",
      "|    n_updates            | 1850      |\n",
      "|    policy_gradient_loss | 0.0408    |\n",
      "|    std                  | 0.111     |\n",
      "|    value_loss           | 399       |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 234          |\n",
      "|    ep_rew_mean          | 1.62e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 313          |\n",
      "|    iterations           | 36           |\n",
      "|    time_elapsed         | 235          |\n",
      "|    total_timesteps      | 73728        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0131429965 |\n",
      "|    clip_fraction        | 0.168        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.785        |\n",
      "|    explained_variance   | 0.889        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 18.4         |\n",
      "|    n_updates            | 1860         |\n",
      "|    policy_gradient_loss | 0.0181       |\n",
      "|    std                  | 0.11         |\n",
      "|    value_loss           | 504          |\n",
      "------------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 240       |\n",
      "|    ep_rew_mean          | 1.69e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 37        |\n",
      "|    time_elapsed         | 241       |\n",
      "|    total_timesteps      | 75776     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.1518465 |\n",
      "|    clip_fraction        | 0.16      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.791     |\n",
      "|    explained_variance   | 0.273     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 9.03      |\n",
      "|    n_updates            | 1870      |\n",
      "|    policy_gradient_loss | 0.0246    |\n",
      "|    std                  | 0.109     |\n",
      "|    value_loss           | 118       |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 251         |\n",
      "|    ep_rew_mean          | 1.81e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 313         |\n",
      "|    iterations           | 38          |\n",
      "|    time_elapsed         | 248         |\n",
      "|    total_timesteps      | 77824       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.073156595 |\n",
      "|    clip_fraction        | 0.208       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.799       |\n",
      "|    explained_variance   | 0.842       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 210         |\n",
      "|    n_updates            | 1880        |\n",
      "|    policy_gradient_loss | 0.0112      |\n",
      "|    std                  | 0.108       |\n",
      "|    value_loss           | 4.19e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 261          |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 313          |\n",
      "|    iterations           | 39           |\n",
      "|    time_elapsed         | 254          |\n",
      "|    total_timesteps      | 79872        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0071015405 |\n",
      "|    clip_fraction        | 0.0589       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.804        |\n",
      "|    explained_variance   | 0.817        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 5.65e+04     |\n",
      "|    n_updates            | 1890         |\n",
      "|    policy_gradient_loss | 0.00896      |\n",
      "|    std                  | 0.108        |\n",
      "|    value_loss           | 4.07e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 276          |\n",
      "|    ep_rew_mean          | 2.14e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 313          |\n",
      "|    iterations           | 40           |\n",
      "|    time_elapsed         | 261          |\n",
      "|    total_timesteps      | 81920        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0025748108 |\n",
      "|    clip_fraction        | 0.0298       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.806        |\n",
      "|    explained_variance   | 0.875        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.81e+03     |\n",
      "|    n_updates            | 1900         |\n",
      "|    policy_gradient_loss | 0.00155      |\n",
      "|    std                  | 0.108        |\n",
      "|    value_loss           | 4.31e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 291         |\n",
      "|    ep_rew_mean          | 2.31e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 313         |\n",
      "|    iterations           | 41          |\n",
      "|    time_elapsed         | 267         |\n",
      "|    total_timesteps      | 83968       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.022461694 |\n",
      "|    clip_fraction        | 0.165       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.817       |\n",
      "|    explained_variance   | -1.04       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 24.9        |\n",
      "|    n_updates            | 1910        |\n",
      "|    policy_gradient_loss | -0.00896    |\n",
      "|    std                  | 0.106       |\n",
      "|    value_loss           | 80.8        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 301          |\n",
      "|    ep_rew_mean          | 2.42e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 313          |\n",
      "|    iterations           | 42           |\n",
      "|    time_elapsed         | 274          |\n",
      "|    total_timesteps      | 86016        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0040264875 |\n",
      "|    clip_fraction        | 0.12         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.828        |\n",
      "|    explained_variance   | 0.27         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 33.7         |\n",
      "|    n_updates            | 1920         |\n",
      "|    policy_gradient_loss | 0.00482      |\n",
      "|    std                  | 0.105        |\n",
      "|    value_loss           | 76.7         |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 309          |\n",
      "|    ep_rew_mean          | 2.52e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 313          |\n",
      "|    iterations           | 43           |\n",
      "|    time_elapsed         | 281          |\n",
      "|    total_timesteps      | 88064        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0075631235 |\n",
      "|    clip_fraction        | 0.0736       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.844        |\n",
      "|    explained_variance   | -0.00534     |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 32.5         |\n",
      "|    n_updates            | 1930         |\n",
      "|    policy_gradient_loss | -0.00709     |\n",
      "|    std                  | 0.103        |\n",
      "|    value_loss           | 7.49e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 316        |\n",
      "|    ep_rew_mean          | 2.6e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 313        |\n",
      "|    iterations           | 44         |\n",
      "|    time_elapsed         | 287        |\n",
      "|    total_timesteps      | 90112      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.04620501 |\n",
      "|    clip_fraction        | 0.0157     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.856      |\n",
      "|    explained_variance   | 0.869      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.12e+03   |\n",
      "|    n_updates            | 1940       |\n",
      "|    policy_gradient_loss | 0.0048     |\n",
      "|    std                  | 0.103      |\n",
      "|    value_loss           | 4.64e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 327         |\n",
      "|    ep_rew_mean          | 2.73e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 313         |\n",
      "|    iterations           | 45          |\n",
      "|    time_elapsed         | 294         |\n",
      "|    total_timesteps      | 92160       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006058362 |\n",
      "|    clip_fraction        | 0.0994      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.862       |\n",
      "|    explained_variance   | 0.793       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 19.4        |\n",
      "|    n_updates            | 1950        |\n",
      "|    policy_gradient_loss | 0.00491     |\n",
      "|    std                  | 0.102       |\n",
      "|    value_loss           | 3.83e+03    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 288       |\n",
      "|    ep_rew_mean          | 2.27e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 46        |\n",
      "|    time_elapsed         | 300       |\n",
      "|    total_timesteps      | 94208     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.6312582 |\n",
      "|    clip_fraction        | 0.119     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.872     |\n",
      "|    explained_variance   | 0.83      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 676       |\n",
      "|    n_updates            | 1960      |\n",
      "|    policy_gradient_loss | 0.028     |\n",
      "|    std                  | 0.101     |\n",
      "|    value_loss           | 763       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 238       |\n",
      "|    ep_rew_mean          | 1.68e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 47        |\n",
      "|    time_elapsed         | 307       |\n",
      "|    total_timesteps      | 96256     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.7975183 |\n",
      "|    clip_fraction        | 0.0773    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.878     |\n",
      "|    explained_variance   | 0.695     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 243       |\n",
      "|    n_updates            | 1970      |\n",
      "|    policy_gradient_loss | 0.00279   |\n",
      "|    std                  | 0.101     |\n",
      "|    value_loss           | 1.46e+04  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 222       |\n",
      "|    ep_rew_mean          | 1.5e+03   |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 48        |\n",
      "|    time_elapsed         | 314       |\n",
      "|    total_timesteps      | 98304     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0325811 |\n",
      "|    clip_fraction        | 0.0752    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.877     |\n",
      "|    explained_variance   | 0.935     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.72e+03  |\n",
      "|    n_updates            | 1980      |\n",
      "|    policy_gradient_loss | 0.0225    |\n",
      "|    std                  | 0.101     |\n",
      "|    value_loss           | 6.88e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 169       |\n",
      "|    ep_rew_mean          | 889       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 49        |\n",
      "|    time_elapsed         | 320       |\n",
      "|    total_timesteps      | 100352    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.9698944 |\n",
      "|    clip_fraction        | 0.0861    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.88      |\n",
      "|    explained_variance   | 0.977     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 176       |\n",
      "|    n_updates            | 1990      |\n",
      "|    policy_gradient_loss | 0.0253    |\n",
      "|    std                  | 0.1       |\n",
      "|    value_loss           | 1.69e+03  |\n",
      "---------------------------------------\n",
      "--- 323.4771258831024 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.9\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 410      |\n",
      "|    ep_rew_mean     | 3.67e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 491      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 371          |\n",
      "|    ep_rew_mean          | 3.21e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 382          |\n",
      "|    iterations           | 2            |\n",
      "|    time_elapsed         | 10           |\n",
      "|    total_timesteps      | 4096         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0068268664 |\n",
      "|    clip_fraction        | 0.049        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.702        |\n",
      "|    explained_variance   | 0.367        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.51         |\n",
      "|    n_updates            | 1520         |\n",
      "|    policy_gradient_loss | 0.000873     |\n",
      "|    std                  | 0.118        |\n",
      "|    value_loss           | 19           |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 384         |\n",
      "|    ep_rew_mean          | 3.35e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 357         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 17          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002668859 |\n",
      "|    clip_fraction        | 0.0114      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.716       |\n",
      "|    explained_variance   | 0.607       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 576         |\n",
      "|    n_updates            | 1530        |\n",
      "|    policy_gradient_loss | -0.000832   |\n",
      "|    std                  | 0.118       |\n",
      "|    value_loss           | 5.02e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 390        |\n",
      "|    ep_rew_mean          | 3.43e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 346        |\n",
      "|    iterations           | 4          |\n",
      "|    time_elapsed         | 23         |\n",
      "|    total_timesteps      | 8192       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.17818353 |\n",
      "|    clip_fraction        | 0.0667     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.725      |\n",
      "|    explained_variance   | 0.177      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 17.6       |\n",
      "|    n_updates            | 1540       |\n",
      "|    policy_gradient_loss | 0.0198     |\n",
      "|    std                  | 0.116      |\n",
      "|    value_loss           | 43         |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 394         |\n",
      "|    ep_rew_mean          | 3.48e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 336         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 30          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.095713854 |\n",
      "|    clip_fraction        | 0.0912      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.752       |\n",
      "|    explained_variance   | 0.288       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 14.5        |\n",
      "|    n_updates            | 1550        |\n",
      "|    policy_gradient_loss | 0.00118     |\n",
      "|    std                  | 0.112       |\n",
      "|    value_loss           | 21.1        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 397        |\n",
      "|    ep_rew_mean          | 3.51e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 333        |\n",
      "|    iterations           | 6          |\n",
      "|    time_elapsed         | 36         |\n",
      "|    total_timesteps      | 12288      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.43601316 |\n",
      "|    clip_fraction        | 0.066      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.795      |\n",
      "|    explained_variance   | 0.338      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.14       |\n",
      "|    n_updates            | 1560       |\n",
      "|    policy_gradient_loss | -0.00711   |\n",
      "|    std                  | 0.107      |\n",
      "|    value_loss           | 14.9       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 388         |\n",
      "|    ep_rew_mean          | 3.41e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 329         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 43          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.030694531 |\n",
      "|    clip_fraction        | 0.0839      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.817       |\n",
      "|    explained_variance   | 0.431       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.32        |\n",
      "|    n_updates            | 1570        |\n",
      "|    policy_gradient_loss | 0.0259      |\n",
      "|    std                  | 0.106       |\n",
      "|    value_loss           | 46          |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 391         |\n",
      "|    ep_rew_mean          | 3.45e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 327         |\n",
      "|    iterations           | 8           |\n",
      "|    time_elapsed         | 49          |\n",
      "|    total_timesteps      | 16384       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.030813953 |\n",
      "|    clip_fraction        | 0.0413      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.823       |\n",
      "|    explained_variance   | 0.594       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.62e+03    |\n",
      "|    n_updates            | 1580        |\n",
      "|    policy_gradient_loss | -0.00309    |\n",
      "|    std                  | 0.106       |\n",
      "|    value_loss           | 1.33e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 393        |\n",
      "|    ep_rew_mean          | 3.48e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 325        |\n",
      "|    iterations           | 9          |\n",
      "|    time_elapsed         | 56         |\n",
      "|    total_timesteps      | 18432      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.55859953 |\n",
      "|    clip_fraction        | 0.0395     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.837      |\n",
      "|    explained_variance   | 0.122      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 15.4       |\n",
      "|    n_updates            | 1590       |\n",
      "|    policy_gradient_loss | 0.0262     |\n",
      "|    std                  | 0.104      |\n",
      "|    value_loss           | 23.3       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 387        |\n",
      "|    ep_rew_mean          | 3.42e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 324        |\n",
      "|    iterations           | 10         |\n",
      "|    time_elapsed         | 63         |\n",
      "|    total_timesteps      | 20480      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.07255541 |\n",
      "|    clip_fraction        | 0.134      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.867      |\n",
      "|    explained_variance   | 0.436      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 8.99       |\n",
      "|    n_updates            | 1600       |\n",
      "|    policy_gradient_loss | 0.00472    |\n",
      "|    std                  | 0.0995     |\n",
      "|    value_loss           | 13.9       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 376        |\n",
      "|    ep_rew_mean          | 3.31e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 323        |\n",
      "|    iterations           | 11         |\n",
      "|    time_elapsed         | 69         |\n",
      "|    total_timesteps      | 22528      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03224013 |\n",
      "|    clip_fraction        | 0.0779     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.892      |\n",
      "|    explained_variance   | 0.459      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 5.98e+03   |\n",
      "|    n_updates            | 1610       |\n",
      "|    policy_gradient_loss | -0.0111    |\n",
      "|    std                  | 0.099      |\n",
      "|    value_loss           | 1.62e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 357        |\n",
      "|    ep_rew_mean          | 3.1e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 321        |\n",
      "|    iterations           | 12         |\n",
      "|    time_elapsed         | 76         |\n",
      "|    total_timesteps      | 24576      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.09172061 |\n",
      "|    clip_fraction        | 0.0982     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.895      |\n",
      "|    explained_variance   | 0.916      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 208        |\n",
      "|    n_updates            | 1620       |\n",
      "|    policy_gradient_loss | 0.0266     |\n",
      "|    std                  | 0.0989     |\n",
      "|    value_loss           | 561        |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 361       |\n",
      "|    ep_rew_mean          | 3.15e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 321       |\n",
      "|    iterations           | 13        |\n",
      "|    time_elapsed         | 82        |\n",
      "|    total_timesteps      | 26624     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.2986917 |\n",
      "|    clip_fraction        | 0.148     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.896     |\n",
      "|    explained_variance   | 0.832     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 13.4      |\n",
      "|    n_updates            | 1630      |\n",
      "|    policy_gradient_loss | 0.0048    |\n",
      "|    std                  | 0.0986    |\n",
      "|    value_loss           | 1.52e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 355         |\n",
      "|    ep_rew_mean          | 3.07e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 320         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 89          |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007305996 |\n",
      "|    clip_fraction        | 0.137       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.915       |\n",
      "|    explained_variance   | 0.132       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.97        |\n",
      "|    n_updates            | 1640        |\n",
      "|    policy_gradient_loss | 0.0108      |\n",
      "|    std                  | 0.0956      |\n",
      "|    value_loss           | 13.8        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 347          |\n",
      "|    ep_rew_mean          | 2.98e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 319          |\n",
      "|    iterations           | 15           |\n",
      "|    time_elapsed         | 96           |\n",
      "|    total_timesteps      | 30720        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0013748652 |\n",
      "|    clip_fraction        | 0.0304       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.931        |\n",
      "|    explained_variance   | 0.713        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.47e+03     |\n",
      "|    n_updates            | 1650         |\n",
      "|    policy_gradient_loss | 0.00277      |\n",
      "|    std                  | 0.0953       |\n",
      "|    value_loss           | 5.65e+03     |\n",
      "------------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 340       |\n",
      "|    ep_rew_mean          | 2.9e+03   |\n",
      "| time/                   |           |\n",
      "|    fps                  | 318       |\n",
      "|    iterations           | 16        |\n",
      "|    time_elapsed         | 102       |\n",
      "|    total_timesteps      | 32768     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.0309855 |\n",
      "|    clip_fraction        | 0.0605    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.932     |\n",
      "|    explained_variance   | 0.927     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.24e+03  |\n",
      "|    n_updates            | 1660      |\n",
      "|    policy_gradient_loss | 0.0303    |\n",
      "|    std                  | 0.0952    |\n",
      "|    value_loss           | 2.63e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 322        |\n",
      "|    ep_rew_mean          | 2.7e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 318        |\n",
      "|    iterations           | 17         |\n",
      "|    time_elapsed         | 109        |\n",
      "|    total_timesteps      | 34816      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.19822335 |\n",
      "|    clip_fraction        | 0.131      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.934      |\n",
      "|    explained_variance   | 0.969      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 423        |\n",
      "|    n_updates            | 1670       |\n",
      "|    policy_gradient_loss | 0.0268     |\n",
      "|    std                  | 0.095      |\n",
      "|    value_loss           | 1.44e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 310        |\n",
      "|    ep_rew_mean          | 2.55e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 317        |\n",
      "|    iterations           | 18         |\n",
      "|    time_elapsed         | 116        |\n",
      "|    total_timesteps      | 36864      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.58269423 |\n",
      "|    clip_fraction        | 0.206      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.937      |\n",
      "|    explained_variance   | 0.939      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 212        |\n",
      "|    n_updates            | 1680       |\n",
      "|    policy_gradient_loss | 0.0866     |\n",
      "|    std                  | 0.0947     |\n",
      "|    value_loss           | 808        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 298        |\n",
      "|    ep_rew_mean          | 2.41e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 317        |\n",
      "|    iterations           | 19         |\n",
      "|    time_elapsed         | 122        |\n",
      "|    total_timesteps      | 38912      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.35888126 |\n",
      "|    clip_fraction        | 0.0912     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.939      |\n",
      "|    explained_variance   | 0.823      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.96       |\n",
      "|    n_updates            | 1690       |\n",
      "|    policy_gradient_loss | 0.00961    |\n",
      "|    std                  | 0.0944     |\n",
      "|    value_loss           | 1.68e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 298       |\n",
      "|    ep_rew_mean          | 2.42e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 316       |\n",
      "|    iterations           | 20        |\n",
      "|    time_elapsed         | 129       |\n",
      "|    total_timesteps      | 40960     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.6453184 |\n",
      "|    clip_fraction        | 0.0872    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.946     |\n",
      "|    explained_variance   | 0.992     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 35.2      |\n",
      "|    n_updates            | 1700      |\n",
      "|    policy_gradient_loss | 0.0182    |\n",
      "|    std                  | 0.0936    |\n",
      "|    value_loss           | 357       |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 302          |\n",
      "|    ep_rew_mean          | 2.46e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 316          |\n",
      "|    iterations           | 21           |\n",
      "|    time_elapsed         | 135          |\n",
      "|    total_timesteps      | 43008        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0037407612 |\n",
      "|    clip_fraction        | 0.0677       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.961        |\n",
      "|    explained_variance   | -0.258       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.95         |\n",
      "|    n_updates            | 1710         |\n",
      "|    policy_gradient_loss | 0.012        |\n",
      "|    std                  | 0.0913       |\n",
      "|    value_loss           | 24.3         |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 294        |\n",
      "|    ep_rew_mean          | 2.38e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 316        |\n",
      "|    iterations           | 22         |\n",
      "|    time_elapsed         | 142        |\n",
      "|    total_timesteps      | 45056      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.45709142 |\n",
      "|    clip_fraction        | 0.0918     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.02       |\n",
      "|    explained_variance   | 0.865      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.31       |\n",
      "|    n_updates            | 1720       |\n",
      "|    policy_gradient_loss | -0.00335   |\n",
      "|    std                  | 0.0851     |\n",
      "|    value_loss           | 6.46       |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 294       |\n",
      "|    ep_rew_mean          | 2.38e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 315       |\n",
      "|    iterations           | 23        |\n",
      "|    time_elapsed         | 149       |\n",
      "|    total_timesteps      | 47104     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.2703952 |\n",
      "|    clip_fraction        | 0.0283    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.05      |\n",
      "|    explained_variance   | 0.993     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 55.9      |\n",
      "|    n_updates            | 1730      |\n",
      "|    policy_gradient_loss | 0.00601   |\n",
      "|    std                  | 0.0843    |\n",
      "|    value_loss           | 179       |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 298         |\n",
      "|    ep_rew_mean          | 2.42e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 315         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 155         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.039614804 |\n",
      "|    clip_fraction        | 0.0638      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.06        |\n",
      "|    explained_variance   | -8.36       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.68        |\n",
      "|    n_updates            | 1740        |\n",
      "|    policy_gradient_loss | 0.0139      |\n",
      "|    std                  | 0.0838      |\n",
      "|    value_loss           | 289         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 294        |\n",
      "|    ep_rew_mean          | 2.35e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 315        |\n",
      "|    iterations           | 25         |\n",
      "|    time_elapsed         | 162        |\n",
      "|    total_timesteps      | 51200      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.06915325 |\n",
      "|    clip_fraction        | 0.087      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.07       |\n",
      "|    explained_variance   | -1.05      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.35       |\n",
      "|    n_updates            | 1750       |\n",
      "|    policy_gradient_loss | 0.019      |\n",
      "|    std                  | 0.0828     |\n",
      "|    value_loss           | 85.9       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 294        |\n",
      "|    ep_rew_mean          | 2.35e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 315        |\n",
      "|    iterations           | 26         |\n",
      "|    time_elapsed         | 168        |\n",
      "|    total_timesteps      | 53248      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.22423156 |\n",
      "|    clip_fraction        | 0.277      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.08       |\n",
      "|    explained_variance   | 0.566      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 11         |\n",
      "|    n_updates            | 1760       |\n",
      "|    policy_gradient_loss | 0.0197     |\n",
      "|    std                  | 0.0812     |\n",
      "|    value_loss           | 4.03e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 306         |\n",
      "|    ep_rew_mean          | 2.49e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 175         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024422072 |\n",
      "|    clip_fraction        | 0.0618      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.09        |\n",
      "|    explained_variance   | 0.321       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 107         |\n",
      "|    n_updates            | 1770        |\n",
      "|    policy_gradient_loss | 0.0243      |\n",
      "|    std                  | 0.0809      |\n",
      "|    value_loss           | 4.14e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 302         |\n",
      "|    ep_rew_mean          | 2.45e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 182         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.060482077 |\n",
      "|    clip_fraction        | 0.0386      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | -0.165      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 9.49        |\n",
      "|    n_updates            | 1780        |\n",
      "|    policy_gradient_loss | 0.00884     |\n",
      "|    std                  | 0.079       |\n",
      "|    value_loss           | 83.1        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 306          |\n",
      "|    ep_rew_mean          | 2.49e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 314          |\n",
      "|    iterations           | 29           |\n",
      "|    time_elapsed         | 188          |\n",
      "|    total_timesteps      | 59392        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0028887088 |\n",
      "|    clip_fraction        | 0.0354       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.12         |\n",
      "|    explained_variance   | 0.797        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 44.3         |\n",
      "|    n_updates            | 1790         |\n",
      "|    policy_gradient_loss | 0.00816      |\n",
      "|    std                  | 0.0787       |\n",
      "|    value_loss           | 3.1e+03      |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 310         |\n",
      "|    ep_rew_mean          | 2.54e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 195         |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.032506168 |\n",
      "|    clip_fraction        | 0.13        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.13        |\n",
      "|    explained_variance   | 0.988       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.78        |\n",
      "|    n_updates            | 1800        |\n",
      "|    policy_gradient_loss | 0.0256      |\n",
      "|    std                  | 0.0773      |\n",
      "|    value_loss           | 34.7        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 318         |\n",
      "|    ep_rew_mean          | 2.63e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 31          |\n",
      "|    time_elapsed         | 201         |\n",
      "|    total_timesteps      | 63488       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.118923195 |\n",
      "|    clip_fraction        | 0.109       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.17        |\n",
      "|    explained_variance   | 0.189       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 9.72        |\n",
      "|    n_updates            | 1810        |\n",
      "|    policy_gradient_loss | 0.00281     |\n",
      "|    std                  | 0.0738      |\n",
      "|    value_loss           | 7.75        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 326        |\n",
      "|    ep_rew_mean          | 2.73e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 32         |\n",
      "|    time_elapsed         | 208        |\n",
      "|    total_timesteps      | 65536      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.93962723 |\n",
      "|    clip_fraction        | 0.0701     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.19       |\n",
      "|    explained_variance   | 0.0722     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 5.13       |\n",
      "|    n_updates            | 1820       |\n",
      "|    policy_gradient_loss | 0.0106     |\n",
      "|    std                  | 0.0733     |\n",
      "|    value_loss           | 2.61e+03   |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 330        |\n",
      "|    ep_rew_mean          | 2.77e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 33         |\n",
      "|    time_elapsed         | 215        |\n",
      "|    total_timesteps      | 67584      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.06436594 |\n",
      "|    clip_fraction        | 0.0774     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.2        |\n",
      "|    explained_variance   | -0.0496    |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 13.1       |\n",
      "|    n_updates            | 1830       |\n",
      "|    policy_gradient_loss | 0.0221     |\n",
      "|    std                  | 0.0731     |\n",
      "|    value_loss           | 93.5       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 342        |\n",
      "|    ep_rew_mean          | 2.92e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 313        |\n",
      "|    iterations           | 34         |\n",
      "|    time_elapsed         | 221        |\n",
      "|    total_timesteps      | 69632      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.32471883 |\n",
      "|    clip_fraction        | 0.026      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.2        |\n",
      "|    explained_variance   | 0.777      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 12.5       |\n",
      "|    n_updates            | 1840       |\n",
      "|    policy_gradient_loss | 0.008      |\n",
      "|    std                  | 0.0731     |\n",
      "|    value_loss           | 4.41e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 350         |\n",
      "|    ep_rew_mean          | 3.02e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 313         |\n",
      "|    iterations           | 35          |\n",
      "|    time_elapsed         | 228         |\n",
      "|    total_timesteps      | 71680       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.020263312 |\n",
      "|    clip_fraction        | 0.127       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.21        |\n",
      "|    explained_variance   | 0.698       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.51        |\n",
      "|    n_updates            | 1850        |\n",
      "|    policy_gradient_loss | 0.00387     |\n",
      "|    std                  | 0.0715      |\n",
      "|    value_loss           | 11.1        |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 358      |\n",
      "|    ep_rew_mean          | 3.15e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 313      |\n",
      "|    iterations           | 36       |\n",
      "|    time_elapsed         | 235      |\n",
      "|    total_timesteps      | 73728    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 6.614031 |\n",
      "|    clip_fraction        | 0.0578   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.24     |\n",
      "|    explained_variance   | 0.798    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 2.71     |\n",
      "|    n_updates            | 1860     |\n",
      "|    policy_gradient_loss | -0.0117  |\n",
      "|    std                  | 0.0682   |\n",
      "|    value_loss           | 5.55     |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 366        |\n",
      "|    ep_rew_mean          | 3.24e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 312        |\n",
      "|    iterations           | 37         |\n",
      "|    time_elapsed         | 242        |\n",
      "|    total_timesteps      | 75776      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03647862 |\n",
      "|    clip_fraction        | 0.0646     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.28       |\n",
      "|    explained_variance   | 0.577      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 499        |\n",
      "|    n_updates            | 1870       |\n",
      "|    policy_gradient_loss | 0.00789    |\n",
      "|    std                  | 0.0669     |\n",
      "|    value_loss           | 2.07e+03   |\n",
      "----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 366      |\n",
      "|    ep_rew_mean          | 3.25e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 312      |\n",
      "|    iterations           | 38       |\n",
      "|    time_elapsed         | 249      |\n",
      "|    total_timesteps      | 77824    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 12.99461 |\n",
      "|    clip_fraction        | 0.069    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.29     |\n",
      "|    explained_variance   | 0.347    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 2.58     |\n",
      "|    n_updates            | 1880     |\n",
      "|    policy_gradient_loss | 0.0258   |\n",
      "|    std                  | 0.0663   |\n",
      "|    value_loss           | 59.9     |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 362       |\n",
      "|    ep_rew_mean          | 3.2e+03   |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 39        |\n",
      "|    time_elapsed         | 255       |\n",
      "|    total_timesteps      | 79872     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.7791483 |\n",
      "|    clip_fraction        | 0.0972    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.3       |\n",
      "|    explained_variance   | -0.0506   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.03      |\n",
      "|    n_updates            | 1890      |\n",
      "|    policy_gradient_loss | 0.0365    |\n",
      "|    std                  | 0.066     |\n",
      "|    value_loss           | 113       |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 362          |\n",
      "|    ep_rew_mean          | 3.21e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 312          |\n",
      "|    iterations           | 40           |\n",
      "|    time_elapsed         | 262          |\n",
      "|    total_timesteps      | 81920        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0033311557 |\n",
      "|    clip_fraction        | 0.0263       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.3          |\n",
      "|    explained_variance   | 0.57         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7e+03        |\n",
      "|    n_updates            | 1900         |\n",
      "|    policy_gradient_loss | 0.00727      |\n",
      "|    std                  | 0.0659       |\n",
      "|    value_loss           | 5.64e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 362        |\n",
      "|    ep_rew_mean          | 3.21e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 312        |\n",
      "|    iterations           | 41         |\n",
      "|    time_elapsed         | 269        |\n",
      "|    total_timesteps      | 83968      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.30596715 |\n",
      "|    clip_fraction        | 0.0576     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.3        |\n",
      "|    explained_variance   | 0.831      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 62.2       |\n",
      "|    n_updates            | 1910       |\n",
      "|    policy_gradient_loss | 0.0097     |\n",
      "|    std                  | 0.0658     |\n",
      "|    value_loss           | 4.11e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 358        |\n",
      "|    ep_rew_mean          | 3.17e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 312        |\n",
      "|    iterations           | 42         |\n",
      "|    time_elapsed         | 275        |\n",
      "|    total_timesteps      | 86016      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02020832 |\n",
      "|    clip_fraction        | 0.0633     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.3        |\n",
      "|    explained_variance   | -1.77      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.67       |\n",
      "|    n_updates            | 1920       |\n",
      "|    policy_gradient_loss | 0.0294     |\n",
      "|    std                  | 0.0656     |\n",
      "|    value_loss           | 313        |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 366        |\n",
      "|    ep_rew_mean          | 3.29e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 311        |\n",
      "|    iterations           | 43         |\n",
      "|    time_elapsed         | 282        |\n",
      "|    total_timesteps      | 88064      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.73134506 |\n",
      "|    clip_fraction        | 0.148      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.31       |\n",
      "|    explained_variance   | 0.472      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 115        |\n",
      "|    n_updates            | 1930       |\n",
      "|    policy_gradient_loss | 0.0374     |\n",
      "|    std                  | 0.0652     |\n",
      "|    value_loss           | 5.65e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 374       |\n",
      "|    ep_rew_mean          | 3.39e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 44        |\n",
      "|    time_elapsed         | 289       |\n",
      "|    total_timesteps      | 90112     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.3605379 |\n",
      "|    clip_fraction        | 0.168     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.33      |\n",
      "|    explained_variance   | 0.846     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.24      |\n",
      "|    n_updates            | 1940      |\n",
      "|    policy_gradient_loss | 0.0245    |\n",
      "|    std                  | 0.0627    |\n",
      "|    value_loss           | 8.89      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 374       |\n",
      "|    ep_rew_mean          | 3.39e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 45        |\n",
      "|    time_elapsed         | 295       |\n",
      "|    total_timesteps      | 92160     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.8682101 |\n",
      "|    clip_fraction        | 0.177     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.36      |\n",
      "|    explained_variance   | 0.144     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.44      |\n",
      "|    n_updates            | 1950      |\n",
      "|    policy_gradient_loss | 0.0145    |\n",
      "|    std                  | 0.0621    |\n",
      "|    value_loss           | 39.8      |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 374         |\n",
      "|    ep_rew_mean          | 3.39e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 46          |\n",
      "|    time_elapsed         | 302         |\n",
      "|    total_timesteps      | 94208       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014522335 |\n",
      "|    clip_fraction        | 0.0893      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.36        |\n",
      "|    explained_variance   | 0.713       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 14.4        |\n",
      "|    n_updates            | 1960        |\n",
      "|    policy_gradient_loss | 0.0213      |\n",
      "|    std                  | 0.0618      |\n",
      "|    value_loss           | 23.9        |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 374      |\n",
      "|    ep_rew_mean          | 3.4e+03  |\n",
      "| time/                   |          |\n",
      "|    fps                  | 311      |\n",
      "|    iterations           | 47       |\n",
      "|    time_elapsed         | 308      |\n",
      "|    total_timesteps      | 96256    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 32.75107 |\n",
      "|    clip_fraction        | 0.111    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.39     |\n",
      "|    explained_variance   | 0.781    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.6      |\n",
      "|    n_updates            | 1970     |\n",
      "|    policy_gradient_loss | 0.044    |\n",
      "|    std                  | 0.0591   |\n",
      "|    value_loss           | 5.86     |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 379        |\n",
      "|    ep_rew_mean          | 3.46e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 312        |\n",
      "|    iterations           | 48         |\n",
      "|    time_elapsed         | 315        |\n",
      "|    total_timesteps      | 98304      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.18702693 |\n",
      "|    clip_fraction        | 0.0294     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.41       |\n",
      "|    explained_variance   | 0.175      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 21         |\n",
      "|    n_updates            | 1980       |\n",
      "|    policy_gradient_loss | 0.017      |\n",
      "|    std                  | 0.0589     |\n",
      "|    value_loss           | 3.5e+03    |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 379         |\n",
      "|    ep_rew_mean          | 3.46e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 49          |\n",
      "|    time_elapsed         | 321         |\n",
      "|    total_timesteps      | 100352      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.053726576 |\n",
      "|    clip_fraction        | 0.0484      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.42        |\n",
      "|    explained_variance   | 0.314       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.13        |\n",
      "|    n_updates            | 1990        |\n",
      "|    policy_gradient_loss | 0.0143      |\n",
      "|    std                  | 0.058       |\n",
      "|    value_loss           | 89          |\n",
      "-----------------------------------------\n",
      "--- 324.5041244029999 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.95\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 410      |\n",
      "|    ep_rew_mean     | 3.65e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 481      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 410       |\n",
      "|    ep_rew_mean          | 3.62e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 372       |\n",
      "|    iterations           | 2         |\n",
      "|    time_elapsed         | 10        |\n",
      "|    total_timesteps      | 4096      |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.5779338 |\n",
      "|    clip_fraction        | 0.202     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.696     |\n",
      "|    explained_variance   | -0.529    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 58.5      |\n",
      "|    n_updates            | 1520      |\n",
      "|    policy_gradient_loss | 0.0276    |\n",
      "|    std                  | 0.12      |\n",
      "|    value_loss           | 63.3      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 190      |\n",
      "|    ep_rew_mean          | 1.06e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 346      |\n",
      "|    iterations           | 3        |\n",
      "|    time_elapsed         | 17       |\n",
      "|    total_timesteps      | 6144     |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 71.04733 |\n",
      "|    clip_fraction        | 0.668    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.706    |\n",
      "|    explained_variance   | -0.629   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 8.73     |\n",
      "|    n_updates            | 1530     |\n",
      "|    policy_gradient_loss | 0.0988   |\n",
      "|    std                  | 0.119    |\n",
      "|    value_loss           | 72.5     |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 139         |\n",
      "|    ep_rew_mean          | 499         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 335         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 24          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009115995 |\n",
      "|    clip_fraction        | 0.153       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.706       |\n",
      "|    explained_variance   | 0.00879     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.14e+03    |\n",
      "|    n_updates            | 1540        |\n",
      "|    policy_gradient_loss | 0.0229      |\n",
      "|    std                  | 0.119       |\n",
      "|    value_loss           | 1.8e+04     |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 120         |\n",
      "|    ep_rew_mean          | 297         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 331         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 30          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004702159 |\n",
      "|    clip_fraction        | 0.0668      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.706       |\n",
      "|    explained_variance   | 0.898       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 253         |\n",
      "|    n_updates            | 1550        |\n",
      "|    policy_gradient_loss | 0.00149     |\n",
      "|    std                  | 0.119       |\n",
      "|    value_loss           | 2.92e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 93.6        |\n",
      "|    ep_rew_mean          | -7.92       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 326         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 37          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012154237 |\n",
      "|    clip_fraction        | 0.136       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.706       |\n",
      "|    explained_variance   | 0.964       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 207         |\n",
      "|    n_updates            | 1560        |\n",
      "|    policy_gradient_loss | 0.02        |\n",
      "|    std                  | 0.119       |\n",
      "|    value_loss           | 2.14e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 82.4        |\n",
      "|    ep_rew_mean          | -123        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 323         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 44          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.041813314 |\n",
      "|    clip_fraction        | 0.171       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.706       |\n",
      "|    explained_variance   | 0.979       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 190         |\n",
      "|    n_updates            | 1570        |\n",
      "|    policy_gradient_loss | 0.0211      |\n",
      "|    std                  | 0.119       |\n",
      "|    value_loss           | 1.38e+03    |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 81.2     |\n",
      "|    ep_rew_mean          | -136     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 321      |\n",
      "|    iterations           | 8        |\n",
      "|    time_elapsed         | 50       |\n",
      "|    total_timesteps      | 16384    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 0.545141 |\n",
      "|    clip_fraction        | 0.176    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.706    |\n",
      "|    explained_variance   | 0.988    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 325      |\n",
      "|    n_updates            | 1580     |\n",
      "|    policy_gradient_loss | 0.029    |\n",
      "|    std                  | 0.119    |\n",
      "|    value_loss           | 811      |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 83          |\n",
      "|    ep_rew_mean          | -121        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 319         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 57          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019917626 |\n",
      "|    clip_fraction        | 0.196       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.706       |\n",
      "|    explained_variance   | 0.993       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 164         |\n",
      "|    n_updates            | 1590        |\n",
      "|    policy_gradient_loss | 0.0155      |\n",
      "|    std                  | 0.119       |\n",
      "|    value_loss           | 393         |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 76        |\n",
      "|    ep_rew_mean          | -188      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 318       |\n",
      "|    iterations           | 10        |\n",
      "|    time_elapsed         | 64        |\n",
      "|    total_timesteps      | 20480     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.4874063 |\n",
      "|    clip_fraction        | 0.153     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.707     |\n",
      "|    explained_variance   | 0.992     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 133       |\n",
      "|    n_updates            | 1600      |\n",
      "|    policy_gradient_loss | 0.0123    |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 353       |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 78.7       |\n",
      "|    ep_rew_mean          | -162       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 318        |\n",
      "|    iterations           | 11         |\n",
      "|    time_elapsed         | 70         |\n",
      "|    total_timesteps      | 22528      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.44502038 |\n",
      "|    clip_fraction        | 0.234      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.709      |\n",
      "|    explained_variance   | 0.994      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 58.6       |\n",
      "|    n_updates            | 1610       |\n",
      "|    policy_gradient_loss | 0.00783    |\n",
      "|    std                  | 0.119      |\n",
      "|    value_loss           | 210        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 82.8       |\n",
      "|    ep_rew_mean          | -129       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 317        |\n",
      "|    iterations           | 12         |\n",
      "|    time_elapsed         | 77         |\n",
      "|    total_timesteps      | 24576      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.31567407 |\n",
      "|    clip_fraction        | 0.157      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.712      |\n",
      "|    explained_variance   | 0.997      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 51         |\n",
      "|    n_updates            | 1620       |\n",
      "|    policy_gradient_loss | 0.0125     |\n",
      "|    std                  | 0.118      |\n",
      "|    value_loss           | 134        |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 71.6      |\n",
      "|    ep_rew_mean          | -230      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 316       |\n",
      "|    iterations           | 13        |\n",
      "|    time_elapsed         | 83        |\n",
      "|    total_timesteps      | 26624     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 57.555008 |\n",
      "|    clip_fraction        | 0.572     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.715     |\n",
      "|    explained_variance   | 0.998     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 59.4      |\n",
      "|    n_updates            | 1630      |\n",
      "|    policy_gradient_loss | 0.169     |\n",
      "|    std                  | 0.118     |\n",
      "|    value_loss           | 191       |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 54.1     |\n",
      "|    ep_rew_mean          | -397     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 316      |\n",
      "|    iterations           | 14       |\n",
      "|    time_elapsed         | 90       |\n",
      "|    total_timesteps      | 28672    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 5.661479 |\n",
      "|    clip_fraction        | 0.408    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.712    |\n",
      "|    explained_variance   | 0.978    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 165      |\n",
      "|    n_updates            | 1640     |\n",
      "|    policy_gradient_loss | 0.124    |\n",
      "|    std                  | 0.119    |\n",
      "|    value_loss           | 1.17e+03 |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 44.4       |\n",
      "|    ep_rew_mean          | -493       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 316        |\n",
      "|    iterations           | 15         |\n",
      "|    time_elapsed         | 97         |\n",
      "|    total_timesteps      | 30720      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.46514696 |\n",
      "|    clip_fraction        | 0.345      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.71       |\n",
      "|    explained_variance   | 0.957      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 317        |\n",
      "|    n_updates            | 1650       |\n",
      "|    policy_gradient_loss | 0.0821     |\n",
      "|    std                  | 0.119      |\n",
      "|    value_loss           | 3.46e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 51.6      |\n",
      "|    ep_rew_mean          | -418      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 315       |\n",
      "|    iterations           | 16        |\n",
      "|    time_elapsed         | 103       |\n",
      "|    total_timesteps      | 32768     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 5.4830613 |\n",
      "|    clip_fraction        | 0.362     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.708     |\n",
      "|    explained_variance   | 0.966     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 113       |\n",
      "|    n_updates            | 1660      |\n",
      "|    policy_gradient_loss | 0.082     |\n",
      "|    std                  | 0.12      |\n",
      "|    value_loss           | 3.13e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 53        |\n",
      "|    ep_rew_mean          | -422      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 314       |\n",
      "|    iterations           | 17        |\n",
      "|    time_elapsed         | 110       |\n",
      "|    total_timesteps      | 34816     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.3725002 |\n",
      "|    clip_fraction        | 0.236     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.707     |\n",
      "|    explained_variance   | 0.994     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 45.6      |\n",
      "|    n_updates            | 1670      |\n",
      "|    policy_gradient_loss | 0.0148    |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 131       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 49.5      |\n",
      "|    ep_rew_mean          | -426      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 314       |\n",
      "|    iterations           | 18        |\n",
      "|    time_elapsed         | 117       |\n",
      "|    total_timesteps      | 36864     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 10.992207 |\n",
      "|    clip_fraction        | 0.628     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.716     |\n",
      "|    explained_variance   | 0.768     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 116       |\n",
      "|    n_updates            | 1680      |\n",
      "|    policy_gradient_loss | 0.0948    |\n",
      "|    std                  | 0.118     |\n",
      "|    value_loss           | 5.18e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 54.7       |\n",
      "|    ep_rew_mean          | -376       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 19         |\n",
      "|    time_elapsed         | 123        |\n",
      "|    total_timesteps      | 38912      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03897142 |\n",
      "|    clip_fraction        | 0.142      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.719      |\n",
      "|    explained_variance   | 0.907      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.26e+03   |\n",
      "|    n_updates            | 1690       |\n",
      "|    policy_gradient_loss | 0.0435     |\n",
      "|    std                  | 0.118      |\n",
      "|    value_loss           | 2.88e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 53.3      |\n",
      "|    ep_rew_mean          | -411      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 20        |\n",
      "|    time_elapsed         | 130       |\n",
      "|    total_timesteps      | 40960     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0264908 |\n",
      "|    clip_fraction        | 0.148     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.719     |\n",
      "|    explained_variance   | 0.878     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 519       |\n",
      "|    n_updates            | 1700      |\n",
      "|    policy_gradient_loss | 0.00699   |\n",
      "|    std                  | 0.118     |\n",
      "|    value_loss           | 2.72e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 60.8       |\n",
      "|    ep_rew_mean          | -349       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 313        |\n",
      "|    iterations           | 21         |\n",
      "|    time_elapsed         | 137        |\n",
      "|    total_timesteps      | 43008      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.13725969 |\n",
      "|    clip_fraction        | 0.228      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.72       |\n",
      "|    explained_variance   | 0.89       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 921        |\n",
      "|    n_updates            | 1710       |\n",
      "|    policy_gradient_loss | 0.0283     |\n",
      "|    std                  | 0.118      |\n",
      "|    value_loss           | 6.56e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 54.1        |\n",
      "|    ep_rew_mean          | -394        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 313         |\n",
      "|    iterations           | 22          |\n",
      "|    time_elapsed         | 143         |\n",
      "|    total_timesteps      | 45056       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019055914 |\n",
      "|    clip_fraction        | 0.132       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.718       |\n",
      "|    explained_variance   | 0.954       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 725         |\n",
      "|    n_updates            | 1720        |\n",
      "|    policy_gradient_loss | 0.0268      |\n",
      "|    std                  | 0.118       |\n",
      "|    value_loss           | 2.43e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 60.2        |\n",
      "|    ep_rew_mean          | -308        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 313         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 150         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013945863 |\n",
      "|    clip_fraction        | 0.132       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.716       |\n",
      "|    explained_variance   | 0.987       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 566         |\n",
      "|    n_updates            | 1730        |\n",
      "|    policy_gradient_loss | 0.00924     |\n",
      "|    std                  | 0.118       |\n",
      "|    value_loss           | 1.35e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 58.3       |\n",
      "|    ep_rew_mean          | -324       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 313        |\n",
      "|    iterations           | 24         |\n",
      "|    time_elapsed         | 156        |\n",
      "|    total_timesteps      | 49152      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.61186635 |\n",
      "|    clip_fraction        | 0.164      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.715      |\n",
      "|    explained_variance   | 0.989      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 722        |\n",
      "|    n_updates            | 1740       |\n",
      "|    policy_gradient_loss | 0.0204     |\n",
      "|    std                  | 0.118      |\n",
      "|    value_loss           | 1.1e+03    |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 68.1        |\n",
      "|    ep_rew_mean          | -263        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 312         |\n",
      "|    iterations           | 25          |\n",
      "|    time_elapsed         | 163         |\n",
      "|    total_timesteps      | 51200       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.018117972 |\n",
      "|    clip_fraction        | 0.124       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.715       |\n",
      "|    explained_variance   | 0.987       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 155         |\n",
      "|    n_updates            | 1750        |\n",
      "|    policy_gradient_loss | 0.00424     |\n",
      "|    std                  | 0.118       |\n",
      "|    value_loss           | 983         |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 70.9      |\n",
      "|    ep_rew_mean          | -256      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 26        |\n",
      "|    time_elapsed         | 170       |\n",
      "|    total_timesteps      | 53248     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.7133713 |\n",
      "|    clip_fraction        | 0.269     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.714     |\n",
      "|    explained_variance   | 0.962     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 5.44e+03  |\n",
      "|    n_updates            | 1760      |\n",
      "|    policy_gradient_loss | 0.0477    |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 4.63e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 75.8      |\n",
      "|    ep_rew_mean          | -233      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 27        |\n",
      "|    time_elapsed         | 177       |\n",
      "|    total_timesteps      | 55296     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.4559422 |\n",
      "|    clip_fraction        | 0.229     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.714     |\n",
      "|    explained_variance   | 0.979     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 27.9      |\n",
      "|    n_updates            | 1770      |\n",
      "|    policy_gradient_loss | 0.0252    |\n",
      "|    std                  | 0.118     |\n",
      "|    value_loss           | 2.62e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 62       |\n",
      "|    ep_rew_mean          | -346     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 312      |\n",
      "|    iterations           | 28       |\n",
      "|    time_elapsed         | 183      |\n",
      "|    total_timesteps      | 57344    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 122.5972 |\n",
      "|    clip_fraction        | 0.964    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.716    |\n",
      "|    explained_variance   | 0.968    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 49.7     |\n",
      "|    n_updates            | 1780     |\n",
      "|    policy_gradient_loss | 0.0326   |\n",
      "|    std                  | 0.118    |\n",
      "|    value_loss           | 731      |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 43.7      |\n",
      "|    ep_rew_mean          | -507      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 29        |\n",
      "|    time_elapsed         | 190       |\n",
      "|    total_timesteps      | 59392     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.9138285 |\n",
      "|    clip_fraction        | 0.201     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.716     |\n",
      "|    explained_variance   | 0.844     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 798       |\n",
      "|    n_updates            | 1790      |\n",
      "|    policy_gradient_loss | 0.0209    |\n",
      "|    std                  | 0.118     |\n",
      "|    value_loss           | 5.35e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 35.1      |\n",
      "|    ep_rew_mean          | -616      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 30        |\n",
      "|    time_elapsed         | 196       |\n",
      "|    total_timesteps      | 61440     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 90.323524 |\n",
      "|    clip_fraction        | 0.597     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.713     |\n",
      "|    explained_variance   | 0.943     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 238       |\n",
      "|    n_updates            | 1800      |\n",
      "|    policy_gradient_loss | 0.206     |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 3.38e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 35.4       |\n",
      "|    ep_rew_mean          | -622       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 311        |\n",
      "|    iterations           | 31         |\n",
      "|    time_elapsed         | 203        |\n",
      "|    total_timesteps      | 63488      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.36873326 |\n",
      "|    clip_fraction        | 0.431      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.709      |\n",
      "|    explained_variance   | 0.869      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.11e+03   |\n",
      "|    n_updates            | 1810       |\n",
      "|    policy_gradient_loss | 0.0818     |\n",
      "|    std                  | 0.119      |\n",
      "|    value_loss           | 6.05e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 37        |\n",
      "|    ep_rew_mean          | -595      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 32        |\n",
      "|    time_elapsed         | 210       |\n",
      "|    total_timesteps      | 65536     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.8648658 |\n",
      "|    clip_fraction        | 0.445     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.708     |\n",
      "|    explained_variance   | 0.896     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 129       |\n",
      "|    n_updates            | 1820      |\n",
      "|    policy_gradient_loss | 0.0881    |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 3.39e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 32.5       |\n",
      "|    ep_rew_mean          | -633       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 311        |\n",
      "|    iterations           | 33         |\n",
      "|    time_elapsed         | 216        |\n",
      "|    total_timesteps      | 67584      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.53007936 |\n",
      "|    clip_fraction        | 0.294      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.708      |\n",
      "|    explained_variance   | 0.933      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 212        |\n",
      "|    n_updates            | 1830       |\n",
      "|    policy_gradient_loss | 0.0448     |\n",
      "|    std                  | 0.119      |\n",
      "|    value_loss           | 4.3e+03    |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 33.3      |\n",
      "|    ep_rew_mean          | -606      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 34        |\n",
      "|    time_elapsed         | 223       |\n",
      "|    total_timesteps      | 69632     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.7086227 |\n",
      "|    clip_fraction        | 0.182     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.711     |\n",
      "|    explained_variance   | 0.964     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 173       |\n",
      "|    n_updates            | 1840      |\n",
      "|    policy_gradient_loss | -0.00359  |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 997       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 32.1      |\n",
      "|    ep_rew_mean          | -626      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 35        |\n",
      "|    time_elapsed         | 230       |\n",
      "|    total_timesteps      | 71680     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.8877957 |\n",
      "|    clip_fraction        | 0.519     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.712     |\n",
      "|    explained_variance   | 0.955     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 41.7      |\n",
      "|    n_updates            | 1850      |\n",
      "|    policy_gradient_loss | 0.122     |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 2.3e+03   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 32.4      |\n",
      "|    ep_rew_mean          | -623      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 36        |\n",
      "|    time_elapsed         | 236       |\n",
      "|    total_timesteps      | 73728     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 43.759644 |\n",
      "|    clip_fraction        | 0.287     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.71      |\n",
      "|    explained_variance   | 0.958     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 58.2      |\n",
      "|    n_updates            | 1860      |\n",
      "|    policy_gradient_loss | 0.0372    |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 352       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 42.6      |\n",
      "|    ep_rew_mean          | -516      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 37        |\n",
      "|    time_elapsed         | 243       |\n",
      "|    total_timesteps      | 75776     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 163.51965 |\n",
      "|    clip_fraction        | 0.635     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.713     |\n",
      "|    explained_variance   | 0.862     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 34        |\n",
      "|    n_updates            | 1870      |\n",
      "|    policy_gradient_loss | 0.11      |\n",
      "|    std                  | 0.118     |\n",
      "|    value_loss           | 3.3e+03   |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 57.7     |\n",
      "|    ep_rew_mean          | -394     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 311      |\n",
      "|    iterations           | 38       |\n",
      "|    time_elapsed         | 250      |\n",
      "|    total_timesteps      | 77824    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 116.8294 |\n",
      "|    clip_fraction        | 0.704    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.714    |\n",
      "|    explained_variance   | 0.99     |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 29.3     |\n",
      "|    n_updates            | 1880     |\n",
      "|    policy_gradient_loss | 0.253    |\n",
      "|    std                  | 0.119    |\n",
      "|    value_loss           | 439      |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 69.5      |\n",
      "|    ep_rew_mean          | -295      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 39        |\n",
      "|    time_elapsed         | 256       |\n",
      "|    total_timesteps      | 79872     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 134.14558 |\n",
      "|    clip_fraction        | 0.743     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.709     |\n",
      "|    explained_variance   | 0.982     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 242       |\n",
      "|    n_updates            | 1890      |\n",
      "|    policy_gradient_loss | 0.301     |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 1.39e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 77.9     |\n",
      "|    ep_rew_mean          | -226     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 311      |\n",
      "|    iterations           | 40       |\n",
      "|    time_elapsed         | 263      |\n",
      "|    total_timesteps      | 81920    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 8.968195 |\n",
      "|    clip_fraction        | 0.751    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.705    |\n",
      "|    explained_variance   | 0.875    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 33.9     |\n",
      "|    n_updates            | 1900     |\n",
      "|    policy_gradient_loss | 0.103    |\n",
      "|    std                  | 0.12     |\n",
      "|    value_loss           | 6.59e+03 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 41.7     |\n",
      "|    ep_rew_mean          | -536     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 311      |\n",
      "|    iterations           | 41       |\n",
      "|    time_elapsed         | 269      |\n",
      "|    total_timesteps      | 83968    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 434.5105 |\n",
      "|    clip_fraction        | 0.926    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.705    |\n",
      "|    explained_variance   | 0.991    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 70.7     |\n",
      "|    n_updates            | 1910     |\n",
      "|    policy_gradient_loss | 0.25     |\n",
      "|    std                  | 0.119    |\n",
      "|    value_loss           | 247      |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 23.8      |\n",
      "|    ep_rew_mean          | -740      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 311       |\n",
      "|    iterations           | 42        |\n",
      "|    time_elapsed         | 276       |\n",
      "|    total_timesteps      | 86016     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 139.23834 |\n",
      "|    clip_fraction        | 0.742     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.706     |\n",
      "|    explained_variance   | 0.87      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 62.6      |\n",
      "|    n_updates            | 1920      |\n",
      "|    policy_gradient_loss | 0.223     |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 3.4e+03   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.9      |\n",
      "|    ep_rew_mean          | -799      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 310       |\n",
      "|    iterations           | 43        |\n",
      "|    time_elapsed         | 283       |\n",
      "|    total_timesteps      | 88064     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 164.94965 |\n",
      "|    clip_fraction        | 0.554     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.706     |\n",
      "|    explained_variance   | 0.821     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 160       |\n",
      "|    n_updates            | 1930      |\n",
      "|    policy_gradient_loss | 0.179     |\n",
      "|    std                  | 0.12      |\n",
      "|    value_loss           | 7.69e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 14.7      |\n",
      "|    ep_rew_mean          | -818      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 309       |\n",
      "|    iterations           | 44        |\n",
      "|    time_elapsed         | 291       |\n",
      "|    total_timesteps      | 90112     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 134.91154 |\n",
      "|    clip_fraction        | 0.795     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.705     |\n",
      "|    explained_variance   | 0.0588    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 81.8      |\n",
      "|    n_updates            | 1940      |\n",
      "|    policy_gradient_loss | 0.285     |\n",
      "|    std                  | 0.119     |\n",
      "|    value_loss           | 4.25e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16        |\n",
      "|    ep_rew_mean          | -800      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 305       |\n",
      "|    iterations           | 45        |\n",
      "|    time_elapsed         | 301       |\n",
      "|    total_timesteps      | 92160     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 121.56989 |\n",
      "|    clip_fraction        | 0.872     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.704     |\n",
      "|    explained_variance   | 0.142     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 105       |\n",
      "|    n_updates            | 1950      |\n",
      "|    policy_gradient_loss | 0.324     |\n",
      "|    std                  | 0.12      |\n",
      "|    value_loss           | 2.6e+03   |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 17.7     |\n",
      "|    ep_rew_mean          | -805     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 304      |\n",
      "|    iterations           | 46       |\n",
      "|    time_elapsed         | 309      |\n",
      "|    total_timesteps      | 94208    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 54.14022 |\n",
      "|    clip_fraction        | 0.78     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.702    |\n",
      "|    explained_variance   | 0.756    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 52.9     |\n",
      "|    n_updates            | 1960     |\n",
      "|    policy_gradient_loss | 0.136    |\n",
      "|    std                  | 0.12     |\n",
      "|    value_loss           | 1.43e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 15.9      |\n",
      "|    ep_rew_mean          | -814      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 304       |\n",
      "|    iterations           | 47        |\n",
      "|    time_elapsed         | 316       |\n",
      "|    total_timesteps      | 96256     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 189.97443 |\n",
      "|    clip_fraction        | 0.929     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.7       |\n",
      "|    explained_variance   | 0.728     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 239       |\n",
      "|    n_updates            | 1970      |\n",
      "|    policy_gradient_loss | 0.171     |\n",
      "|    std                  | 0.12      |\n",
      "|    value_loss           | 2.16e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.8      |\n",
      "|    ep_rew_mean          | -863      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 304       |\n",
      "|    iterations           | 48        |\n",
      "|    time_elapsed         | 322       |\n",
      "|    total_timesteps      | 98304     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 29.299284 |\n",
      "|    clip_fraction        | 0.722     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.698     |\n",
      "|    explained_variance   | 0.721     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 7.01e+03  |\n",
      "|    n_updates            | 1980      |\n",
      "|    policy_gradient_loss | 0.145     |\n",
      "|    std                  | 0.121     |\n",
      "|    value_loss           | 2.24e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 15.7     |\n",
      "|    ep_rew_mean          | -833     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 303      |\n",
      "|    iterations           | 49       |\n",
      "|    time_elapsed         | 330      |\n",
      "|    total_timesteps      | 100352   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 31.63016 |\n",
      "|    clip_fraction        | 0.806    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.695    |\n",
      "|    explained_variance   | 0.567    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 43.6     |\n",
      "|    n_updates            | 1990     |\n",
      "|    policy_gradient_loss | 0.215    |\n",
      "|    std                  | 0.121    |\n",
      "|    value_loss           | 3.84e+03 |\n",
      "--------------------------------------\n",
      "--- 334.6348261833191 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 1.0\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 332      |\n",
      "|    ep_rew_mean     | 2.74e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 209      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 9        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 371         |\n",
      "|    ep_rew_mean          | 3.19e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 201         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 20          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009935738 |\n",
      "|    clip_fraction        | 0.0398      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.692       |\n",
      "|    explained_variance   | 0.564       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.56e+03    |\n",
      "|    n_updates            | 1520        |\n",
      "|    policy_gradient_loss | -0.00193    |\n",
      "|    std                  | 0.121       |\n",
      "|    value_loss           | 3.91e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 361         |\n",
      "|    ep_rew_mean          | 3.09e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 227         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 26          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.020685554 |\n",
      "|    clip_fraction        | 0.108       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.7         |\n",
      "|    explained_variance   | 0.134       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.1         |\n",
      "|    n_updates            | 1530        |\n",
      "|    policy_gradient_loss | 0.0153      |\n",
      "|    std                  | 0.12        |\n",
      "|    value_loss           | 37.4        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 341        |\n",
      "|    ep_rew_mean          | 2.88e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 204        |\n",
      "|    iterations           | 4          |\n",
      "|    time_elapsed         | 39         |\n",
      "|    total_timesteps      | 8192       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.16227922 |\n",
      "|    clip_fraction        | 0.126      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.707      |\n",
      "|    explained_variance   | 0.923      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.41       |\n",
      "|    n_updates            | 1540       |\n",
      "|    policy_gradient_loss | 0.00577    |\n",
      "|    std                  | 0.119      |\n",
      "|    value_loss           | 447        |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 354        |\n",
      "|    ep_rew_mean          | 3.01e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 181        |\n",
      "|    iterations           | 5          |\n",
      "|    time_elapsed         | 56         |\n",
      "|    total_timesteps      | 10240      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.07554555 |\n",
      "|    clip_fraction        | 0.0652     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.71       |\n",
      "|    explained_variance   | 0.82       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 18.4       |\n",
      "|    n_updates            | 1550       |\n",
      "|    policy_gradient_loss | 0.0358     |\n",
      "|    std                  | 0.119      |\n",
      "|    value_loss           | 683        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 352        |\n",
      "|    ep_rew_mean          | 2.99e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 184        |\n",
      "|    iterations           | 6          |\n",
      "|    time_elapsed         | 66         |\n",
      "|    total_timesteps      | 12288      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.37925747 |\n",
      "|    clip_fraction        | 0.18       |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.723      |\n",
      "|    explained_variance   | 0.453      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 15.5       |\n",
      "|    n_updates            | 1560       |\n",
      "|    policy_gradient_loss | 0.0302     |\n",
      "|    std                  | 0.116      |\n",
      "|    value_loss           | 38.1       |\n",
      "----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 359      |\n",
      "|    ep_rew_mean          | 3.08e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 188      |\n",
      "|    iterations           | 7        |\n",
      "|    time_elapsed         | 75       |\n",
      "|    total_timesteps      | 14336    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 0.880979 |\n",
      "|    clip_fraction        | 0.0464   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.742    |\n",
      "|    explained_variance   | 0.899    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 5.37     |\n",
      "|    n_updates            | 1570     |\n",
      "|    policy_gradient_loss | 0.0132   |\n",
      "|    std                  | 0.114    |\n",
      "|    value_loss           | 178      |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 365         |\n",
      "|    ep_rew_mean          | 3.14e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 191         |\n",
      "|    iterations           | 8           |\n",
      "|    time_elapsed         | 85          |\n",
      "|    total_timesteps      | 16384       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.057572506 |\n",
      "|    clip_fraction        | 0.0694      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.76        |\n",
      "|    explained_variance   | -1.35       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 20          |\n",
      "|    n_updates            | 1580        |\n",
      "|    policy_gradient_loss | 0.0112      |\n",
      "|    std                  | 0.113       |\n",
      "|    value_loss           | 202         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 363        |\n",
      "|    ep_rew_mean          | 3.12e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 193        |\n",
      "|    iterations           | 9          |\n",
      "|    time_elapsed         | 95         |\n",
      "|    total_timesteps      | 18432      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.25556487 |\n",
      "|    clip_fraction        | 0.161      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.774      |\n",
      "|    explained_variance   | -0.105     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 45.9       |\n",
      "|    n_updates            | 1590       |\n",
      "|    policy_gradient_loss | 0.0141     |\n",
      "|    std                  | 0.11       |\n",
      "|    value_loss           | 43.6       |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 360       |\n",
      "|    ep_rew_mean          | 3.09e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 196       |\n",
      "|    iterations           | 10        |\n",
      "|    time_elapsed         | 104       |\n",
      "|    total_timesteps      | 20480     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.5999752 |\n",
      "|    clip_fraction        | 0.19      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.791     |\n",
      "|    explained_variance   | 0.685     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 23.6      |\n",
      "|    n_updates            | 1600      |\n",
      "|    policy_gradient_loss | 0.0104    |\n",
      "|    std                  | 0.109     |\n",
      "|    value_loss           | 1.81e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 359      |\n",
      "|    ep_rew_mean          | 3.07e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 195      |\n",
      "|    iterations           | 11       |\n",
      "|    time_elapsed         | 114      |\n",
      "|    total_timesteps      | 22528    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 0.989316 |\n",
      "|    clip_fraction        | 0.0937   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.794    |\n",
      "|    explained_variance   | 0.965    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.49e+03 |\n",
      "|    n_updates            | 1610     |\n",
      "|    policy_gradient_loss | 0.00978  |\n",
      "|    std                  | 0.109    |\n",
      "|    value_loss           | 553      |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 353        |\n",
      "|    ep_rew_mean          | 3.01e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 192        |\n",
      "|    iterations           | 12         |\n",
      "|    time_elapsed         | 127        |\n",
      "|    total_timesteps      | 24576      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.17424175 |\n",
      "|    clip_fraction        | 0.101      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.798      |\n",
      "|    explained_variance   | 0.667      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.39e+03   |\n",
      "|    n_updates            | 1620       |\n",
      "|    policy_gradient_loss | 0.0112     |\n",
      "|    std                  | 0.108      |\n",
      "|    value_loss           | 2.48e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 356        |\n",
      "|    ep_rew_mean          | 3.05e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 193        |\n",
      "|    iterations           | 13         |\n",
      "|    time_elapsed         | 137        |\n",
      "|    total_timesteps      | 26624      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.05304908 |\n",
      "|    clip_fraction        | 0.0664     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.803      |\n",
      "|    explained_variance   | 0.844      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 64.1       |\n",
      "|    n_updates            | 1630       |\n",
      "|    policy_gradient_loss | 0.0155     |\n",
      "|    std                  | 0.108      |\n",
      "|    value_loss           | 4.62e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 360          |\n",
      "|    ep_rew_mean          | 3.09e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 194          |\n",
      "|    iterations           | 14           |\n",
      "|    time_elapsed         | 147          |\n",
      "|    total_timesteps      | 28672        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0090817595 |\n",
      "|    clip_fraction        | 0.132        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.807        |\n",
      "|    explained_variance   | -2.85        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 20.4         |\n",
      "|    n_updates            | 1640         |\n",
      "|    policy_gradient_loss | 0.00992      |\n",
      "|    std                  | 0.108        |\n",
      "|    value_loss           | 170          |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 351        |\n",
      "|    ep_rew_mean          | 2.99e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 194        |\n",
      "|    iterations           | 15         |\n",
      "|    time_elapsed         | 157        |\n",
      "|    total_timesteps      | 30720      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.47372687 |\n",
      "|    clip_fraction        | 0.236      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.822      |\n",
      "|    explained_variance   | 0.424      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 8.24       |\n",
      "|    n_updates            | 1650       |\n",
      "|    policy_gradient_loss | 0.062      |\n",
      "|    std                  | 0.105      |\n",
      "|    value_loss           | 28.4       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 343        |\n",
      "|    ep_rew_mean          | 2.91e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 196        |\n",
      "|    iterations           | 16         |\n",
      "|    time_elapsed         | 167        |\n",
      "|    total_timesteps      | 32768      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.15681988 |\n",
      "|    clip_fraction        | 0.133      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.832      |\n",
      "|    explained_variance   | 0.565      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 38.1       |\n",
      "|    n_updates            | 1660       |\n",
      "|    policy_gradient_loss | -0.011     |\n",
      "|    std                  | 0.105      |\n",
      "|    value_loss           | 1.3e+03    |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 339         |\n",
      "|    ep_rew_mean          | 2.86e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 197         |\n",
      "|    iterations           | 17          |\n",
      "|    time_elapsed         | 176         |\n",
      "|    total_timesteps      | 34816       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.047362752 |\n",
      "|    clip_fraction        | 0.11        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.835       |\n",
      "|    explained_variance   | 0.939       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.9e+03     |\n",
      "|    n_updates            | 1670        |\n",
      "|    policy_gradient_loss | 0.00887     |\n",
      "|    std                  | 0.105       |\n",
      "|    value_loss           | 2.46e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 319        |\n",
      "|    ep_rew_mean          | 2.65e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 198        |\n",
      "|    iterations           | 18         |\n",
      "|    time_elapsed         | 185        |\n",
      "|    total_timesteps      | 36864      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.51646733 |\n",
      "|    clip_fraction        | 0.241      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.843      |\n",
      "|    explained_variance   | 0.973      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 36.5       |\n",
      "|    n_updates            | 1680       |\n",
      "|    policy_gradient_loss | 0.0146     |\n",
      "|    std                  | 0.104      |\n",
      "|    value_loss           | 239        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 319         |\n",
      "|    ep_rew_mean          | 2.65e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 200         |\n",
      "|    iterations           | 19          |\n",
      "|    time_elapsed         | 194         |\n",
      "|    total_timesteps      | 38912       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.118696526 |\n",
      "|    clip_fraction        | 0.0298      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.848       |\n",
      "|    explained_variance   | 0.945       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.22e+03    |\n",
      "|    n_updates            | 1690        |\n",
      "|    policy_gradient_loss | 0.0117      |\n",
      "|    std                  | 0.104       |\n",
      "|    value_loss           | 3.9e+03     |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 319        |\n",
      "|    ep_rew_mean          | 2.66e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 201        |\n",
      "|    iterations           | 20         |\n",
      "|    time_elapsed         | 203        |\n",
      "|    total_timesteps      | 40960      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.09122539 |\n",
      "|    clip_fraction        | 0.243      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.859      |\n",
      "|    explained_variance   | 0.981      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 25.6       |\n",
      "|    n_updates            | 1700       |\n",
      "|    policy_gradient_loss | 0.0443     |\n",
      "|    std                  | 0.102      |\n",
      "|    value_loss           | 126        |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 319       |\n",
      "|    ep_rew_mean          | 2.66e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 202       |\n",
      "|    iterations           | 21        |\n",
      "|    time_elapsed         | 212       |\n",
      "|    total_timesteps      | 43008     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 16.768503 |\n",
      "|    clip_fraction        | 0.923     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.871     |\n",
      "|    explained_variance   | 0.919     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 124       |\n",
      "|    n_updates            | 1710      |\n",
      "|    policy_gradient_loss | 0.187     |\n",
      "|    std                  | 0.101     |\n",
      "|    value_loss           | 137       |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 323        |\n",
      "|    ep_rew_mean          | 2.7e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 203        |\n",
      "|    iterations           | 22         |\n",
      "|    time_elapsed         | 221        |\n",
      "|    total_timesteps      | 45056      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.07689613 |\n",
      "|    clip_fraction        | 0.183      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.896      |\n",
      "|    explained_variance   | 0.344      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.38       |\n",
      "|    n_updates            | 1720       |\n",
      "|    policy_gradient_loss | 0.00567    |\n",
      "|    std                  | 0.0963     |\n",
      "|    value_loss           | 14.8       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 316        |\n",
      "|    ep_rew_mean          | 2.62e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 204        |\n",
      "|    iterations           | 23         |\n",
      "|    time_elapsed         | 230        |\n",
      "|    total_timesteps      | 47104      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.37951568 |\n",
      "|    clip_fraction        | 0.24       |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.942      |\n",
      "|    explained_variance   | -0.679     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 4.96       |\n",
      "|    n_updates            | 1730       |\n",
      "|    policy_gradient_loss | 0.0345     |\n",
      "|    std                  | 0.092      |\n",
      "|    value_loss           | 46.5       |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 312          |\n",
      "|    ep_rew_mean          | 2.58e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 204          |\n",
      "|    iterations           | 24           |\n",
      "|    time_elapsed         | 239          |\n",
      "|    total_timesteps      | 49152        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0037876756 |\n",
      "|    clip_fraction        | 0.0545       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.968        |\n",
      "|    explained_variance   | 0.174        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.4e+03      |\n",
      "|    n_updates            | 1740         |\n",
      "|    policy_gradient_loss | -0.00492     |\n",
      "|    std                  | 0.0919       |\n",
      "|    value_loss           | 2.98e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 316        |\n",
      "|    ep_rew_mean          | 2.63e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 205        |\n",
      "|    iterations           | 25         |\n",
      "|    time_elapsed         | 248        |\n",
      "|    total_timesteps      | 51200      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.60806036 |\n",
      "|    clip_fraction        | 0.0981     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.968      |\n",
      "|    explained_variance   | 0.895      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 227        |\n",
      "|    n_updates            | 1750       |\n",
      "|    policy_gradient_loss | 0.000591   |\n",
      "|    std                  | 0.0918     |\n",
      "|    value_loss           | 483        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.68e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 205         |\n",
      "|    iterations           | 26          |\n",
      "|    time_elapsed         | 259         |\n",
      "|    total_timesteps      | 53248       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009598814 |\n",
      "|    clip_fraction        | 0.0666      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.975       |\n",
      "|    explained_variance   | -0.0483     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 18.3        |\n",
      "|    n_updates            | 1760        |\n",
      "|    policy_gradient_loss | 0.00438     |\n",
      "|    std                  | 0.0908      |\n",
      "|    value_loss           | 65.6        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.68e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 206         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 268         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013394855 |\n",
      "|    clip_fraction        | 0.063       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.984       |\n",
      "|    explained_variance   | 0.00237     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 46.3        |\n",
      "|    n_updates            | 1770        |\n",
      "|    policy_gradient_loss | 0.0194      |\n",
      "|    std                  | 0.09        |\n",
      "|    value_loss           | 62.1        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 324         |\n",
      "|    ep_rew_mean          | 2.73e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 206         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 277         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009437308 |\n",
      "|    clip_fraction        | 0.0401      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.991       |\n",
      "|    explained_variance   | 0.817       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 152         |\n",
      "|    n_updates            | 1780        |\n",
      "|    policy_gradient_loss | 0.0105      |\n",
      "|    std                  | 0.0896      |\n",
      "|    value_loss           | 1.29e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.69e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 206         |\n",
      "|    iterations           | 29          |\n",
      "|    time_elapsed         | 287         |\n",
      "|    total_timesteps      | 59392       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.109366104 |\n",
      "|    clip_fraction        | 0.131       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.01        |\n",
      "|    explained_variance   | 0.709       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.94        |\n",
      "|    n_updates            | 1790        |\n",
      "|    policy_gradient_loss | 0.00689     |\n",
      "|    std                  | 0.0858      |\n",
      "|    value_loss           | 14.6        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 320         |\n",
      "|    ep_rew_mean          | 2.7e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 207         |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 296         |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019132426 |\n",
      "|    clip_fraction        | 0.0185      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.04        |\n",
      "|    explained_variance   | 0.875       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 117         |\n",
      "|    n_updates            | 1800        |\n",
      "|    policy_gradient_loss | 0.00512     |\n",
      "|    std                  | 0.0857      |\n",
      "|    value_loss           | 1.43e+03    |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 309      |\n",
      "|    ep_rew_mean          | 2.48e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 207      |\n",
      "|    iterations           | 31       |\n",
      "|    time_elapsed         | 305      |\n",
      "|    total_timesteps      | 63488    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 1.994001 |\n",
      "|    clip_fraction        | 0.0777   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.06     |\n",
      "|    explained_variance   | 0.718    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 6.36     |\n",
      "|    n_updates            | 1810     |\n",
      "|    policy_gradient_loss | 0.0122   |\n",
      "|    std                  | 0.0826   |\n",
      "|    value_loss           | 12.9     |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 270        |\n",
      "|    ep_rew_mean          | 1.82e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 208        |\n",
      "|    iterations           | 32         |\n",
      "|    time_elapsed         | 314        |\n",
      "|    total_timesteps      | 65536      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01581019 |\n",
      "|    clip_fraction        | 0.13       |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.08       |\n",
      "|    explained_variance   | 0.221      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.33e+04   |\n",
      "|    n_updates            | 1820       |\n",
      "|    policy_gradient_loss | 0.00684    |\n",
      "|    std                  | 0.0825     |\n",
      "|    value_loss           | 8.29e+04   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 201          |\n",
      "|    ep_rew_mean          | 802          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 208          |\n",
      "|    iterations           | 33           |\n",
      "|    time_elapsed         | 323          |\n",
      "|    total_timesteps      | 67584        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030897881 |\n",
      "|    clip_fraction        | 0.0104       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.08         |\n",
      "|    explained_variance   | 0.779        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.42e+03     |\n",
      "|    n_updates            | 1830         |\n",
      "|    policy_gradient_loss | -0.00248     |\n",
      "|    std                  | 0.0825       |\n",
      "|    value_loss           | 5.18e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 153          |\n",
      "|    ep_rew_mean          | 35.1         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 34           |\n",
      "|    time_elapsed         | 332          |\n",
      "|    total_timesteps      | 69632        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0065469136 |\n",
      "|    clip_fraction        | 0.0423       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.08         |\n",
      "|    explained_variance   | 0.867        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.48e+04     |\n",
      "|    n_updates            | 1840         |\n",
      "|    policy_gradient_loss | -0.00688     |\n",
      "|    std                  | 0.0825       |\n",
      "|    value_loss           | 8.26e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 97.9       |\n",
      "|    ep_rew_mean          | -768       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 209        |\n",
      "|    iterations           | 35         |\n",
      "|    time_elapsed         | 341        |\n",
      "|    total_timesteps      | 71680      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01100356 |\n",
      "|    clip_fraction        | 0.0729     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.08       |\n",
      "|    explained_variance   | 0.908      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.01e+04   |\n",
      "|    n_updates            | 1850       |\n",
      "|    policy_gradient_loss | -0.00912   |\n",
      "|    std                  | 0.0825     |\n",
      "|    value_loss           | 5.38e+04   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 111         |\n",
      "|    ep_rew_mean          | -570        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 36          |\n",
      "|    time_elapsed         | 352         |\n",
      "|    total_timesteps      | 73728       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008891087 |\n",
      "|    clip_fraction        | 0.0702      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.08        |\n",
      "|    explained_variance   | 0.939       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.72e+04    |\n",
      "|    n_updates            | 1860        |\n",
      "|    policy_gradient_loss | -0.0103     |\n",
      "|    std                  | 0.0824      |\n",
      "|    value_loss           | 3.08e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 110          |\n",
      "|    ep_rew_mean          | -513         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 37           |\n",
      "|    time_elapsed         | 361          |\n",
      "|    total_timesteps      | 75776        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0046361834 |\n",
      "|    clip_fraction        | 0.0222       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.08         |\n",
      "|    explained_variance   | 0.879        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 784          |\n",
      "|    n_updates            | 1870         |\n",
      "|    policy_gradient_loss | -0.00239     |\n",
      "|    std                  | 0.0823       |\n",
      "|    value_loss           | 3.52e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 126         |\n",
      "|    ep_rew_mean          | -262        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 38          |\n",
      "|    time_elapsed         | 370         |\n",
      "|    total_timesteps      | 77824       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.002519243 |\n",
      "|    clip_fraction        | 0.0211      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.08        |\n",
      "|    explained_variance   | 0.928       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 641         |\n",
      "|    n_updates            | 1880        |\n",
      "|    policy_gradient_loss | -0.00388    |\n",
      "|    std                  | 0.0823      |\n",
      "|    value_loss           | 6.25e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 140         |\n",
      "|    ep_rew_mean          | -48.4       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 210         |\n",
      "|    iterations           | 39          |\n",
      "|    time_elapsed         | 380         |\n",
      "|    total_timesteps      | 79872       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003286648 |\n",
      "|    clip_fraction        | 0.0251      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.08        |\n",
      "|    explained_variance   | 0.952       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 560         |\n",
      "|    n_updates            | 1890        |\n",
      "|    policy_gradient_loss | -0.0036     |\n",
      "|    std                  | 0.0822      |\n",
      "|    value_loss           | 3.21e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 154         |\n",
      "|    ep_rew_mean          | 150         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 210         |\n",
      "|    iterations           | 40          |\n",
      "|    time_elapsed         | 389         |\n",
      "|    total_timesteps      | 81920       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009747768 |\n",
      "|    clip_fraction        | 0.0318      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.08        |\n",
      "|    explained_variance   | 0.956       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 150         |\n",
      "|    n_updates            | 1900        |\n",
      "|    policy_gradient_loss | 0.00738     |\n",
      "|    std                  | 0.0821      |\n",
      "|    value_loss           | 1.18e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 167         |\n",
      "|    ep_rew_mean          | 409         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 210         |\n",
      "|    iterations           | 41          |\n",
      "|    time_elapsed         | 398         |\n",
      "|    total_timesteps      | 83968       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.046376273 |\n",
      "|    clip_fraction        | 0.0764      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.08        |\n",
      "|    explained_variance   | 0.98        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 48.8        |\n",
      "|    n_updates            | 1910        |\n",
      "|    policy_gradient_loss | 0.0116      |\n",
      "|    std                  | 0.0817      |\n",
      "|    value_loss           | 643         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 187        |\n",
      "|    ep_rew_mean          | 676        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 210        |\n",
      "|    iterations           | 42         |\n",
      "|    time_elapsed         | 407        |\n",
      "|    total_timesteps      | 86016      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.12413182 |\n",
      "|    clip_fraction        | 0.0888     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.09       |\n",
      "|    explained_variance   | 0.978      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 14.4       |\n",
      "|    n_updates            | 1920       |\n",
      "|    policy_gradient_loss | 0.0069     |\n",
      "|    std                  | 0.0814     |\n",
      "|    value_loss           | 342        |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 183          |\n",
      "|    ep_rew_mean          | 770          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 211          |\n",
      "|    iterations           | 43           |\n",
      "|    time_elapsed         | 416          |\n",
      "|    total_timesteps      | 88064        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023063696 |\n",
      "|    clip_fraction        | 0.0353       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.09         |\n",
      "|    explained_variance   | 0.938        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 92.2         |\n",
      "|    n_updates            | 1930         |\n",
      "|    policy_gradient_loss | 0.00698      |\n",
      "|    std                  | 0.0813       |\n",
      "|    value_loss           | 1.2e+03      |\n",
      "------------------------------------------\n",
      "-------------------------------------------\n",
      "| rollout/                |               |\n",
      "|    ep_len_mean          | 191           |\n",
      "|    ep_rew_mean          | 911           |\n",
      "| time/                   |               |\n",
      "|    fps                  | 211           |\n",
      "|    iterations           | 44            |\n",
      "|    time_elapsed         | 426           |\n",
      "|    total_timesteps      | 90112         |\n",
      "| train/                  |               |\n",
      "|    approx_kl            | 0.00086451333 |\n",
      "|    clip_fraction        | 0.0211        |\n",
      "|    clip_range           | 0.2           |\n",
      "|    entropy_loss         | 1.09          |\n",
      "|    explained_variance   | 0.986         |\n",
      "|    learning_rate        | 0.0003        |\n",
      "|    loss                 | 361           |\n",
      "|    n_updates            | 1940          |\n",
      "|    policy_gradient_loss | 0.00066       |\n",
      "|    std                  | 0.0813        |\n",
      "|    value_loss           | 2.59e+03      |\n",
      "-------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 198         |\n",
      "|    ep_rew_mean          | 1.04e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 211         |\n",
      "|    iterations           | 45          |\n",
      "|    time_elapsed         | 435         |\n",
      "|    total_timesteps      | 92160       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005604318 |\n",
      "|    clip_fraction        | 0.0329      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.09        |\n",
      "|    explained_variance   | 0.993       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 119         |\n",
      "|    n_updates            | 1950        |\n",
      "|    policy_gradient_loss | 0.000506    |\n",
      "|    std                  | 0.0812      |\n",
      "|    value_loss           | 2.61e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 198          |\n",
      "|    ep_rew_mean          | 1.03e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 211          |\n",
      "|    iterations           | 46           |\n",
      "|    time_elapsed         | 444          |\n",
      "|    total_timesteps      | 94208        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0054782964 |\n",
      "|    clip_fraction        | 0.0406       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.09         |\n",
      "|    explained_variance   | 0.986        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 250          |\n",
      "|    n_updates            | 1960         |\n",
      "|    policy_gradient_loss | 0.00971      |\n",
      "|    std                  | 0.081        |\n",
      "|    value_loss           | 1.54e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 205        |\n",
      "|    ep_rew_mean          | 1.14e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 212        |\n",
      "|    iterations           | 47         |\n",
      "|    time_elapsed         | 454        |\n",
      "|    total_timesteps      | 96256      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03163623 |\n",
      "|    clip_fraction        | 0.0521     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.1        |\n",
      "|    explained_variance   | 0.991      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 15.4       |\n",
      "|    n_updates            | 1970       |\n",
      "|    policy_gradient_loss | -0.000431  |\n",
      "|    std                  | 0.0805     |\n",
      "|    value_loss           | 414        |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 209          |\n",
      "|    ep_rew_mean          | 1.18e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 212          |\n",
      "|    iterations           | 48           |\n",
      "|    time_elapsed         | 463          |\n",
      "|    total_timesteps      | 98304        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0032501013 |\n",
      "|    clip_fraction        | 0.023        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.1          |\n",
      "|    explained_variance   | 0.978        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.9e+03      |\n",
      "|    n_updates            | 1980         |\n",
      "|    policy_gradient_loss | 0.00325      |\n",
      "|    std                  | 0.0805       |\n",
      "|    value_loss           | 4.71e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 197         |\n",
      "|    ep_rew_mean          | 1.03e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 212         |\n",
      "|    iterations           | 49          |\n",
      "|    time_elapsed         | 472         |\n",
      "|    total_timesteps      | 100352      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008104316 |\n",
      "|    clip_fraction        | 0.0415      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.1         |\n",
      "|    explained_variance   | 0.941       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 109         |\n",
      "|    n_updates            | 1990        |\n",
      "|    policy_gradient_loss | 0.00485     |\n",
      "|    std                  | 0.0805      |\n",
      "|    value_loss           | 732         |\n",
      "-----------------------------------------\n",
      "--- 476.00412940979004 seconds ---\n"
     ]
    }
   ],
   "source": [
    "for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:\n",
    "    model = PPO.load(\"model_backup/acc-2000000-64-64-64-64-100000-0.1\")\n",
    "    model.set_env(env)\n",
    "    \n",
    "    print(\"p=\",p)\n",
    "\n",
    "    env.init_polytopes(p,retrain_polytopes)\n",
    "    env.unwrapped.INCLUDE_UNWINNABLE = False\n",
    "    start_time = time.time()\n",
    "    model=model.learn(total_timesteps=training_episode_length)\n",
    "    print(\"--- %s seconds ---\" % (time.time() - start_time))\n",
    "\n",
    "    model.save(\"model_backup/acc-2000000-64-64-64-64-100000-100000-\"+str(p))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.0\n",
      "Overall:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/steuber/anaconda3/envs/nnequiv-tf1/lib/python3.7/site-packages/stable_baselines3/common/evaluation.py:69: UserWarning: Evaluation environment is not wrapped with a ``Monitor`` wrapper. This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. Consider wrapping environment first with ``Monitor`` wrapper.\n",
      "  UserWarning,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:1794.36 +/- 2335.86\n",
      "Focus Polytopes:\n",
      "mean_reward:4092.76 +/- 3.23\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.1\n",
      "Overall:\n",
      "mean_reward:3922.05 +/- 469.46\n",
      "Focus Polytopes:\n",
      "mean_reward:2698.67 +/- 2560.74\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.5\n",
      "Overall:\n",
      "mean_reward:1565.85 +/- 2398.24\n",
      "Focus Polytopes:\n",
      "mean_reward:-512.65 +/- 14.74\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.9\n",
      "Overall:\n",
      "mean_reward:3396.02 +/- 1602.89\n",
      "Focus Polytopes:\n",
      "mean_reward:4086.05 +/- 1.35\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.95\n",
      "Overall:\n",
      "mean_reward:-799.06 +/- 4.62\n",
      "Focus Polytopes:\n",
      "mean_reward:-745.90 +/- 13.90\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 1.0\n",
      "Overall:\n",
      "mean_reward:1343.46 +/- 2703.59\n",
      "Focus Polytopes:\n",
      "mean_reward:-1062.23 +/- 2197.96\n"
     ]
    }
   ],
   "source": [
    "# Performance of models on focus polytopes only?\n",
    "for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:\n",
    "    results_overall[p]=[]\n",
    "    results_polys[p]=[]\n",
    "    model = PPO.load(\"model_backup/acc-2000000-64-64-64-64-100000-100000-\"+str(p))\n",
    "    model.set_env(env)\n",
    "    print(\"p=\",p)\n",
    "    \n",
    "    print(\"Overall:\")\n",
    "    env.init_polytopes(1.0,[])\n",
    "    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)\n",
    "    results_overall[p].append((mean_reward, std_reward))\n",
    "    print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")\n",
    "    \n",
    "    print(\"Focus Polytopes:\")\n",
    "    env.init_polytopes(0.0,retrain_polytopes)\n",
    "    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)\n",
    "    results_polys[p].append((mean_reward, std_reward))\n",
    "    print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{0.0: [(1794.3577423461675, 2335.856322265823)], 0.1: [(3922.052573099375, 469.46124813879163)], 0.5: [(1565.8549169559478, 2398.2350640688524)], 0.9: [(3396.023815936208, 1602.8926549522741)], 0.95: [(-799.0567969540358, 4.618851365744381)], 1.0: [(1343.4610378121138, 2703.5927546197913)]}\n",
      "{0.0: [(4092.762310071349, 3.2332930538565092)], 0.1: [(2698.6682721825837, 2560.73957567821)], 0.5: [(-512.6466750481129, 14.741641097988378)], 0.9: [(4086.0508041523694, 1.3531052203752272)], 0.95: [(-745.8993273051977, 13.904470316282218)], 1.0: [(-1062.232770757675, 2197.96044819203)]}\n"
     ]
    }
   ],
   "source": [
    "print(results_overall)\n",
    "print(results_polys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.0\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 410      |\n",
      "|    ep_rew_mean     | 3.98e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 438      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 410         |\n",
      "|    ep_rew_mean          | 3.98e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 280         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 14          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012362827 |\n",
      "|    clip_fraction        | 0.0751      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.93        |\n",
      "|    explained_variance   | 0.399       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.717       |\n",
      "|    n_updates            | 2010        |\n",
      "|    policy_gradient_loss | -0.00466    |\n",
      "|    std                  | 0.0346      |\n",
      "|    value_loss           | 6.98        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 410         |\n",
      "|    ep_rew_mean          | 3.98e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 253         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 24          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013151471 |\n",
      "|    clip_fraction        | 0.0999      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.99        |\n",
      "|    explained_variance   | -0.0326     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.18        |\n",
      "|    n_updates            | 2020        |\n",
      "|    policy_gradient_loss | -0.00797    |\n",
      "|    std                  | 0.032       |\n",
      "|    value_loss           | 5.11        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 410         |\n",
      "|    ep_rew_mean          | 3.98e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 250         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 32          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024587963 |\n",
      "|    clip_fraction        | 0.143       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.05        |\n",
      "|    explained_variance   | -0.499      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.658       |\n",
      "|    n_updates            | 2030        |\n",
      "|    policy_gradient_loss | -0.0106     |\n",
      "|    std                  | 0.0306      |\n",
      "|    value_loss           | 4.58        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 410         |\n",
      "|    ep_rew_mean          | 3.98e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 249         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 41          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008222705 |\n",
      "|    clip_fraction        | 0.0779      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.08        |\n",
      "|    explained_variance   | -1.49       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.77        |\n",
      "|    n_updates            | 2040        |\n",
      "|    policy_gradient_loss | -0.00239    |\n",
      "|    std                  | 0.0299      |\n",
      "|    value_loss           | 4.62        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 277        |\n",
      "|    ep_rew_mean          | 2.04e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 242        |\n",
      "|    iterations           | 6          |\n",
      "|    time_elapsed         | 50         |\n",
      "|    total_timesteps      | 12288      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.14351755 |\n",
      "|    clip_fraction        | 0.108      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.14       |\n",
      "|    explained_variance   | -0.554     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.236      |\n",
      "|    n_updates            | 2050       |\n",
      "|    policy_gradient_loss | -0.00266   |\n",
      "|    std                  | 0.0276     |\n",
      "|    value_loss           | 1.63       |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 197          |\n",
      "|    ep_rew_mean          | 869          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 239          |\n",
      "|    iterations           | 7            |\n",
      "|    time_elapsed         | 59           |\n",
      "|    total_timesteps      | 14336        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0016255763 |\n",
      "|    clip_fraction        | 0.0117       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.18         |\n",
      "|    explained_variance   | 0.000426     |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 13.5         |\n",
      "|    n_updates            | 2060         |\n",
      "|    policy_gradient_loss | -0.000454    |\n",
      "|    std                  | 0.0272       |\n",
      "|    value_loss           | 1.97e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 171          |\n",
      "|    ep_rew_mean          | 487          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 224          |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 73           |\n",
      "|    total_timesteps      | 16384        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030633854 |\n",
      "|    clip_fraction        | 0.0399       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.19         |\n",
      "|    explained_variance   | 0.791        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 85.5         |\n",
      "|    n_updates            | 2070         |\n",
      "|    policy_gradient_loss | 0.00022      |\n",
      "|    std                  | 0.027        |\n",
      "|    value_loss           | 1.5e+04      |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 111         |\n",
      "|    ep_rew_mean          | -379        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 196         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 93          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011207394 |\n",
      "|    clip_fraction        | 0.0921      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.2         |\n",
      "|    explained_variance   | 0.885       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 167         |\n",
      "|    n_updates            | 2080        |\n",
      "|    policy_gradient_loss | 0.00262     |\n",
      "|    std                  | 0.0268      |\n",
      "|    value_loss           | 8.78e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 78.7        |\n",
      "|    ep_rew_mean          | -858        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 185         |\n",
      "|    iterations           | 10          |\n",
      "|    time_elapsed         | 110         |\n",
      "|    total_timesteps      | 20480       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007108792 |\n",
      "|    clip_fraction        | 0.0409      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.21        |\n",
      "|    explained_variance   | 0.839       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.36        |\n",
      "|    n_updates            | 2090        |\n",
      "|    policy_gradient_loss | -0.000817   |\n",
      "|    std                  | 0.0265      |\n",
      "|    value_loss           | 1.11e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 91         |\n",
      "|    ep_rew_mean          | -678       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 178        |\n",
      "|    iterations           | 11         |\n",
      "|    time_elapsed         | 126        |\n",
      "|    total_timesteps      | 22528      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00563364 |\n",
      "|    clip_fraction        | 0.0512     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.22       |\n",
      "|    explained_variance   | 0.963      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 79.5       |\n",
      "|    n_updates            | 2100       |\n",
      "|    policy_gradient_loss | 0.00244    |\n",
      "|    std                  | 0.0261     |\n",
      "|    value_loss           | 3.88e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 74.6       |\n",
      "|    ep_rew_mean          | -918       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 177        |\n",
      "|    iterations           | 12         |\n",
      "|    time_elapsed         | 138        |\n",
      "|    total_timesteps      | 24576      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00875468 |\n",
      "|    clip_fraction        | 0.049      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.24       |\n",
      "|    explained_variance   | 0.934      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 31.6       |\n",
      "|    n_updates            | 2110       |\n",
      "|    policy_gradient_loss | 0.0017     |\n",
      "|    std                  | 0.0256     |\n",
      "|    value_loss           | 3.7e+03    |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 70.5         |\n",
      "|    ep_rew_mean          | -978         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 178          |\n",
      "|    iterations           | 13           |\n",
      "|    time_elapsed         | 148          |\n",
      "|    total_timesteps      | 26624        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0042320997 |\n",
      "|    clip_fraction        | 0.0415       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.26         |\n",
      "|    explained_variance   | 0.998        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 33.7         |\n",
      "|    n_updates            | 2120         |\n",
      "|    policy_gradient_loss | -0.00114     |\n",
      "|    std                  | 0.0251       |\n",
      "|    value_loss           | 275          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 70.5        |\n",
      "|    ep_rew_mean          | -977        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 178         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 160         |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008594116 |\n",
      "|    clip_fraction        | 0.135       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.28        |\n",
      "|    explained_variance   | 0.97        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.412       |\n",
      "|    n_updates            | 2130        |\n",
      "|    policy_gradient_loss | -0.000548   |\n",
      "|    std                  | 0.0245      |\n",
      "|    value_loss           | 3.66e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 66.4        |\n",
      "|    ep_rew_mean          | -1.04e+03   |\n",
      "| time/                   |             |\n",
      "|    fps                  | 174         |\n",
      "|    iterations           | 15          |\n",
      "|    time_elapsed         | 175         |\n",
      "|    total_timesteps      | 30720       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014693956 |\n",
      "|    clip_fraction        | 0.0923      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.3         |\n",
      "|    explained_variance   | 0.917       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.96        |\n",
      "|    n_updates            | 2140        |\n",
      "|    policy_gradient_loss | -0.00244    |\n",
      "|    std                  | 0.0241      |\n",
      "|    value_loss           | 7.57e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 82.8         |\n",
      "|    ep_rew_mean          | -796         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 175          |\n",
      "|    iterations           | 16           |\n",
      "|    time_elapsed         | 186          |\n",
      "|    total_timesteps      | 32768        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0079075275 |\n",
      "|    clip_fraction        | 0.0979       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.32         |\n",
      "|    explained_variance   | 0.958        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 0.306        |\n",
      "|    n_updates            | 2150         |\n",
      "|    policy_gradient_loss | -0.00455     |\n",
      "|    std                  | 0.0234       |\n",
      "|    value_loss           | 3.78e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 91           |\n",
      "|    ep_rew_mean          | -676         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 173          |\n",
      "|    iterations           | 17           |\n",
      "|    time_elapsed         | 200          |\n",
      "|    total_timesteps      | 34816        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0063619595 |\n",
      "|    clip_fraction        | 0.0614       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.36         |\n",
      "|    explained_variance   | 0.999        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.57         |\n",
      "|    n_updates            | 2160         |\n",
      "|    policy_gradient_loss | -0.00289     |\n",
      "|    std                  | 0.0223       |\n",
      "|    value_loss           | 14           |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 91          |\n",
      "|    ep_rew_mean          | -675        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 171         |\n",
      "|    iterations           | 18          |\n",
      "|    time_elapsed         | 214         |\n",
      "|    total_timesteps      | 36864       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019352708 |\n",
      "|    clip_fraction        | 0.104       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.4         |\n",
      "|    explained_variance   | 0.899       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.77        |\n",
      "|    n_updates            | 2170        |\n",
      "|    policy_gradient_loss | -0.00137    |\n",
      "|    std                  | 0.0217      |\n",
      "|    value_loss           | 3.93e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 99.2        |\n",
      "|    ep_rew_mean          | -554        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 173         |\n",
      "|    iterations           | 19          |\n",
      "|    time_elapsed         | 224         |\n",
      "|    total_timesteps      | 38912       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004769553 |\n",
      "|    clip_fraction        | 0.0621      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.42        |\n",
      "|    explained_variance   | 0.961       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.889       |\n",
      "|    n_updates            | 2180        |\n",
      "|    policy_gradient_loss | -0.00315    |\n",
      "|    std                  | 0.0213      |\n",
      "|    value_loss           | 4.57e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 107          |\n",
      "|    ep_rew_mean          | -433         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 173          |\n",
      "|    iterations           | 20           |\n",
      "|    time_elapsed         | 236          |\n",
      "|    total_timesteps      | 40960        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0049392404 |\n",
      "|    clip_fraction        | 0.0411       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.45         |\n",
      "|    explained_variance   | 0.826        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 11           |\n",
      "|    n_updates            | 2190         |\n",
      "|    policy_gradient_loss | -0.00329     |\n",
      "|    std                  | 0.0204       |\n",
      "|    value_loss           | 8.44e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 86.9        |\n",
      "|    ep_rew_mean          | -734        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 173         |\n",
      "|    iterations           | 21          |\n",
      "|    time_elapsed         | 247         |\n",
      "|    total_timesteps      | 43008       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014405288 |\n",
      "|    clip_fraction        | 0.176       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.49        |\n",
      "|    explained_variance   | 0.948       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.79        |\n",
      "|    n_updates            | 2200        |\n",
      "|    policy_gradient_loss | -0.00723    |\n",
      "|    std                  | 0.0198      |\n",
      "|    value_loss           | 3.71e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 91           |\n",
      "|    ep_rew_mean          | -673         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 172          |\n",
      "|    iterations           | 22           |\n",
      "|    time_elapsed         | 261          |\n",
      "|    total_timesteps      | 45056        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0085405335 |\n",
      "|    clip_fraction        | 0.144        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.51         |\n",
      "|    explained_variance   | 0.999        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 11.6         |\n",
      "|    n_updates            | 2210         |\n",
      "|    policy_gradient_loss | 0.0028       |\n",
      "|    std                  | 0.0194       |\n",
      "|    value_loss           | 43.3         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 66.4        |\n",
      "|    ep_rew_mean          | -1.03e+03   |\n",
      "| time/                   |             |\n",
      "|    fps                  | 172         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 272         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.021444134 |\n",
      "|    clip_fraction        | 0.17        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.54        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.972       |\n",
      "|    n_updates            | 2220        |\n",
      "|    policy_gradient_loss | -0.00615    |\n",
      "|    std                  | 0.019       |\n",
      "|    value_loss           | 26.5        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 66.4        |\n",
      "|    ep_rew_mean          | -1.03e+03   |\n",
      "| time/                   |             |\n",
      "|    fps                  | 172         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 284         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009352626 |\n",
      "|    clip_fraction        | 0.126       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.55        |\n",
      "|    explained_variance   | 0.889       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 22.3        |\n",
      "|    n_updates            | 2230        |\n",
      "|    policy_gradient_loss | -0.00603    |\n",
      "|    std                  | 0.0189      |\n",
      "|    value_loss           | 1.88e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 58.3         |\n",
      "|    ep_rew_mean          | -1.15e+03    |\n",
      "| time/                   |              |\n",
      "|    fps                  | 172          |\n",
      "|    iterations           | 25           |\n",
      "|    time_elapsed         | 296          |\n",
      "|    total_timesteps      | 51200        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0043982645 |\n",
      "|    clip_fraction        | 0.0833       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.55         |\n",
      "|    explained_variance   | 0.995        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.31         |\n",
      "|    n_updates            | 2240         |\n",
      "|    policy_gradient_loss | 0.000244     |\n",
      "|    std                  | 0.0188       |\n",
      "|    value_loss           | 147          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 66.4         |\n",
      "|    ep_rew_mean          | -1.03e+03    |\n",
      "| time/                   |              |\n",
      "|    fps                  | 173          |\n",
      "|    iterations           | 26           |\n",
      "|    time_elapsed         | 306          |\n",
      "|    total_timesteps      | 53248        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0058936165 |\n",
      "|    clip_fraction        | 0.0978       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.56         |\n",
      "|    explained_variance   | 0.929        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.4          |\n",
      "|    n_updates            | 2250         |\n",
      "|    policy_gradient_loss | -0.000484    |\n",
      "|    std                  | 0.0186       |\n",
      "|    value_loss           | 7.53e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 74.6       |\n",
      "|    ep_rew_mean          | -913       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 174        |\n",
      "|    iterations           | 27         |\n",
      "|    time_elapsed         | 317        |\n",
      "|    total_timesteps      | 55296      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01182563 |\n",
      "|    clip_fraction        | 0.158      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.57       |\n",
      "|    explained_variance   | 0.997      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.76       |\n",
      "|    n_updates            | 2260       |\n",
      "|    policy_gradient_loss | 0.0064     |\n",
      "|    std                  | 0.0183     |\n",
      "|    value_loss           | 212        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 82.8        |\n",
      "|    ep_rew_mean          | -792        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 174         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 328         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011016525 |\n",
      "|    clip_fraction        | 0.132       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.59        |\n",
      "|    explained_variance   | 0.95        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.09        |\n",
      "|    n_updates            | 2270        |\n",
      "|    policy_gradient_loss | -0.000889   |\n",
      "|    std                  | 0.0179      |\n",
      "|    value_loss           | 4.16e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 91          |\n",
      "|    ep_rew_mean          | -671        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 175         |\n",
      "|    iterations           | 29          |\n",
      "|    time_elapsed         | 339         |\n",
      "|    total_timesteps      | 59392       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013595166 |\n",
      "|    clip_fraction        | 0.2         |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.62        |\n",
      "|    explained_variance   | 0.999       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.17        |\n",
      "|    n_updates            | 2280        |\n",
      "|    policy_gradient_loss | -0.00096    |\n",
      "|    std                  | 0.0175      |\n",
      "|    value_loss           | 38.3        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 99.2        |\n",
      "|    ep_rew_mean          | -550        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 175         |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 349         |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005199193 |\n",
      "|    clip_fraction        | 0.162       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.64        |\n",
      "|    explained_variance   | 0.998       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 493         |\n",
      "|    n_updates            | 2290        |\n",
      "|    policy_gradient_loss | 0.00161     |\n",
      "|    std                  | 0.0168      |\n",
      "|    value_loss           | 31.6        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 120        |\n",
      "|    ep_rew_mean          | -248       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 176        |\n",
      "|    iterations           | 31         |\n",
      "|    time_elapsed         | 359        |\n",
      "|    total_timesteps      | 63488      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01116137 |\n",
      "|    clip_fraction        | 0.0898     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.68       |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.32       |\n",
      "|    n_updates            | 2300       |\n",
      "|    policy_gradient_loss | 0.00738    |\n",
      "|    std                  | 0.0164     |\n",
      "|    value_loss           | 72.5       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 116         |\n",
      "|    ep_rew_mean          | -308        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 177         |\n",
      "|    iterations           | 32          |\n",
      "|    time_elapsed         | 370         |\n",
      "|    total_timesteps      | 65536       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014285559 |\n",
      "|    clip_fraction        | 0.13        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.71        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.05        |\n",
      "|    n_updates            | 2310        |\n",
      "|    policy_gradient_loss | -0.00159    |\n",
      "|    std                  | 0.0159      |\n",
      "|    value_loss           | 7.25        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 120         |\n",
      "|    ep_rew_mean          | -248        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 177         |\n",
      "|    iterations           | 33          |\n",
      "|    time_elapsed         | 380         |\n",
      "|    total_timesteps      | 67584       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.058304295 |\n",
      "|    clip_fraction        | 0.087       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.73        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.416       |\n",
      "|    n_updates            | 2320        |\n",
      "|    policy_gradient_loss | 0.0134      |\n",
      "|    std                  | 0.0157      |\n",
      "|    value_loss           | 66.2        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 128         |\n",
      "|    ep_rew_mean          | -126        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 178         |\n",
      "|    iterations           | 34          |\n",
      "|    time_elapsed         | 390         |\n",
      "|    total_timesteps      | 69632       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010804932 |\n",
      "|    clip_fraction        | 0.128       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.75        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.01        |\n",
      "|    n_updates            | 2330        |\n",
      "|    policy_gradient_loss | 0.00656     |\n",
      "|    std                  | 0.0152      |\n",
      "|    value_loss           | 22.4        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 116         |\n",
      "|    ep_rew_mean          | -307        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 178         |\n",
      "|    iterations           | 35          |\n",
      "|    time_elapsed         | 401         |\n",
      "|    total_timesteps      | 71680       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.062455337 |\n",
      "|    clip_fraction        | 0.0994      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.79        |\n",
      "|    explained_variance   | 0.999       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.55        |\n",
      "|    n_updates            | 2340        |\n",
      "|    policy_gradient_loss | -0.00548    |\n",
      "|    std                  | 0.0146      |\n",
      "|    value_loss           | 15.1        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 128        |\n",
      "|    ep_rew_mean          | -125       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 178        |\n",
      "|    iterations           | 36         |\n",
      "|    time_elapsed         | 412        |\n",
      "|    total_timesteps      | 73728      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02491185 |\n",
      "|    clip_fraction        | 0.0793     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.82       |\n",
      "|    explained_variance   | 0.995      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 125        |\n",
      "|    n_updates            | 2350       |\n",
      "|    policy_gradient_loss | -0.00487   |\n",
      "|    std                  | 0.0143     |\n",
      "|    value_loss           | 98.6       |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 107          |\n",
      "|    ep_rew_mean          | -427         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 178          |\n",
      "|    iterations           | 37           |\n",
      "|    time_elapsed         | 424          |\n",
      "|    total_timesteps      | 75776        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0055865864 |\n",
      "|    clip_fraction        | 0.052        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.85         |\n",
      "|    explained_variance   | 0.999        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.3          |\n",
      "|    n_updates            | 2360         |\n",
      "|    policy_gradient_loss | -0.00508     |\n",
      "|    std                  | 0.0138       |\n",
      "|    value_loss           | 33.9         |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 103         |\n",
      "|    ep_rew_mean          | -487        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 178         |\n",
      "|    iterations           | 38          |\n",
      "|    time_elapsed         | 437         |\n",
      "|    total_timesteps      | 77824       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004977862 |\n",
      "|    clip_fraction        | 0.043       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.87        |\n",
      "|    explained_variance   | 0.831       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 117         |\n",
      "|    n_updates            | 2370        |\n",
      "|    policy_gradient_loss | -0.0016     |\n",
      "|    std                  | 0.0136      |\n",
      "|    value_loss           | 2.14e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 82.8        |\n",
      "|    ep_rew_mean          | -789        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 177         |\n",
      "|    iterations           | 39          |\n",
      "|    time_elapsed         | 449         |\n",
      "|    total_timesteps      | 79872       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.023520678 |\n",
      "|    clip_fraction        | 0.0735      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.88        |\n",
      "|    explained_variance   | 0.999       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.92        |\n",
      "|    n_updates            | 2380        |\n",
      "|    policy_gradient_loss | -0.00151    |\n",
      "|    std                  | 0.0135      |\n",
      "|    value_loss           | 29.5        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 95.1        |\n",
      "|    ep_rew_mean          | -608        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 174         |\n",
      "|    iterations           | 40          |\n",
      "|    time_elapsed         | 469         |\n",
      "|    total_timesteps      | 81920       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004557796 |\n",
      "|    clip_fraction        | 0.119       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.89        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 17.5        |\n",
      "|    n_updates            | 2390        |\n",
      "|    policy_gradient_loss | 0.00262     |\n",
      "|    std                  | 0.0133      |\n",
      "|    value_loss           | 55.6        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 99.2        |\n",
      "|    ep_rew_mean          | -547        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 171         |\n",
      "|    iterations           | 41          |\n",
      "|    time_elapsed         | 488         |\n",
      "|    total_timesteps      | 83968       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.026696816 |\n",
      "|    clip_fraction        | 0.101       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.92        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.0412      |\n",
      "|    n_updates            | 2400        |\n",
      "|    policy_gradient_loss | -0.00157    |\n",
      "|    std                  | 0.0128      |\n",
      "|    value_loss           | 10.9        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 116         |\n",
      "|    ep_rew_mean          | -304        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 169         |\n",
      "|    iterations           | 42          |\n",
      "|    time_elapsed         | 508         |\n",
      "|    total_timesteps      | 86016       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009432506 |\n",
      "|    clip_fraction        | 0.124       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.97        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.27        |\n",
      "|    n_updates            | 2410        |\n",
      "|    policy_gradient_loss | -0.0006     |\n",
      "|    std                  | 0.0122      |\n",
      "|    value_loss           | 7.77        |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 99.2       |\n",
      "|    ep_rew_mean          | -546       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 168        |\n",
      "|    iterations           | 43         |\n",
      "|    time_elapsed         | 523        |\n",
      "|    total_timesteps      | 88064      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.05561515 |\n",
      "|    clip_fraction        | 0.13       |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 3.01       |\n",
      "|    explained_variance   | 0.999      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.798      |\n",
      "|    n_updates            | 2420       |\n",
      "|    policy_gradient_loss | -0.00855   |\n",
      "|    std                  | 0.0117     |\n",
      "|    value_loss           | 46.4       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 95.1        |\n",
      "|    ep_rew_mean          | -606        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 165         |\n",
      "|    iterations           | 44          |\n",
      "|    time_elapsed         | 542         |\n",
      "|    total_timesteps      | 90112       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016800746 |\n",
      "|    clip_fraction        | 0.125       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 3.04        |\n",
      "|    explained_variance   | 0.968       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.3         |\n",
      "|    n_updates            | 2430        |\n",
      "|    policy_gradient_loss | 0.00689     |\n",
      "|    std                  | 0.0114      |\n",
      "|    value_loss           | 4.4e+03     |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 50.1        |\n",
      "|    ep_rew_mean          | -1.27e+03   |\n",
      "| time/                   |             |\n",
      "|    fps                  | 164         |\n",
      "|    iterations           | 45          |\n",
      "|    time_elapsed         | 560         |\n",
      "|    total_timesteps      | 92160       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.018114954 |\n",
      "|    clip_fraction        | 0.176       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 3.05        |\n",
      "|    explained_variance   | 0.952       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.09        |\n",
      "|    n_updates            | 2440        |\n",
      "|    policy_gradient_loss | -0.00607    |\n",
      "|    std                  | 0.0114      |\n",
      "|    value_loss           | 4.38e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 46         |\n",
      "|    ep_rew_mean          | -1.33e+03  |\n",
      "| time/                   |            |\n",
      "|    fps                  | 162        |\n",
      "|    iterations           | 46         |\n",
      "|    time_elapsed         | 578        |\n",
      "|    total_timesteps      | 94208      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.08054425 |\n",
      "|    clip_fraction        | 0.171      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 3.05       |\n",
      "|    explained_variance   | 0.96       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 60.3       |\n",
      "|    n_updates            | 2450       |\n",
      "|    policy_gradient_loss | -0.0111    |\n",
      "|    std                  | 0.0114     |\n",
      "|    value_loss           | 8.72e+03   |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 50.1      |\n",
      "|    ep_rew_mean          | -1.27e+03 |\n",
      "| time/                   |           |\n",
      "|    fps                  | 162       |\n",
      "|    iterations           | 47        |\n",
      "|    time_elapsed         | 592       |\n",
      "|    total_timesteps      | 96256     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.0697892 |\n",
      "|    clip_fraction        | 0.147     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 3.06      |\n",
      "|    explained_variance   | 1         |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 12.5      |\n",
      "|    n_updates            | 2460      |\n",
      "|    policy_gradient_loss | 0.00895   |\n",
      "|    std                  | 0.0114    |\n",
      "|    value_loss           | 38.1      |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 66.4        |\n",
      "|    ep_rew_mean          | -1.03e+03   |\n",
      "| time/                   |             |\n",
      "|    fps                  | 163         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 601         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.054528404 |\n",
      "|    clip_fraction        | 0.211       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 3.06        |\n",
      "|    explained_variance   | 1           |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.378       |\n",
      "|    n_updates            | 2470        |\n",
      "|    policy_gradient_loss | 0.0142      |\n",
      "|    std                  | 0.0113      |\n",
      "|    value_loss           | 22          |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 82.8       |\n",
      "|    ep_rew_mean          | -788       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 164        |\n",
      "|    iterations           | 49         |\n",
      "|    time_elapsed         | 611        |\n",
      "|    total_timesteps      | 100352     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02092311 |\n",
      "|    clip_fraction        | 0.202      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 3.06       |\n",
      "|    explained_variance   | 1          |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.95       |\n",
      "|    n_updates            | 2480       |\n",
      "|    policy_gradient_loss | 0.00292    |\n",
      "|    std                  | 0.0114     |\n",
      "|    value_loss           | 9.71       |\n",
      "----------------------------------------\n",
      "--- 614.3314969539642 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.1\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 328      |\n",
      "|    ep_rew_mean     | 2.78e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 369      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 5        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 369         |\n",
      "|    ep_rew_mean          | 3.37e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 249         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 16          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.041651666 |\n",
      "|    clip_fraction        | 0.133       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.88        |\n",
      "|    explained_variance   | -0.744      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.547       |\n",
      "|    n_updates            | 2010        |\n",
      "|    policy_gradient_loss | 0.0308      |\n",
      "|    std                  | 0.0367      |\n",
      "|    value_loss           | 3.96e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 383         |\n",
      "|    ep_rew_mean          | 3.57e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 227         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 27          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.046865176 |\n",
      "|    clip_fraction        | 0.102       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.91        |\n",
      "|    explained_variance   | -4.4e+03    |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.515       |\n",
      "|    n_updates            | 2020        |\n",
      "|    policy_gradient_loss | -0.00371    |\n",
      "|    std                  | 0.035       |\n",
      "|    value_loss           | 638         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 390         |\n",
      "|    ep_rew_mean          | 3.67e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 225         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 36          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009403409 |\n",
      "|    clip_fraction        | 0.0512      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.96        |\n",
      "|    explained_variance   | -1.07       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.4         |\n",
      "|    n_updates            | 2030        |\n",
      "|    policy_gradient_loss | -0.00683    |\n",
      "|    std                  | 0.0334      |\n",
      "|    value_loss           | 2.8         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 379         |\n",
      "|    ep_rew_mean          | 3.52e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 222         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 45          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.026783623 |\n",
      "|    clip_fraction        | 0.0793      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.01        |\n",
      "|    explained_variance   | -0.817      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.21        |\n",
      "|    n_updates            | 2040        |\n",
      "|    policy_gradient_loss | -0.00441    |\n",
      "|    std                  | 0.0315      |\n",
      "|    value_loss           | 1.73        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 290         |\n",
      "|    ep_rew_mean          | 2.22e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 223         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 54          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019427888 |\n",
      "|    clip_fraction        | 0.0868      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.07        |\n",
      "|    explained_variance   | 0.00134     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.668       |\n",
      "|    n_updates            | 2050        |\n",
      "|    policy_gradient_loss | -0.00557    |\n",
      "|    std                  | 0.0297      |\n",
      "|    value_loss           | 4.29e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 263         |\n",
      "|    ep_rew_mean          | 1.83e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 229         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 62          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.096022636 |\n",
      "|    clip_fraction        | 0.0647      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.1         |\n",
      "|    explained_variance   | 0.00436     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 17.4        |\n",
      "|    n_updates            | 2060        |\n",
      "|    policy_gradient_loss | 0.0316      |\n",
      "|    std                  | 0.0295      |\n",
      "|    value_loss           | 1.8e+04     |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 246        |\n",
      "|    ep_rew_mean          | 1.58e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 227        |\n",
      "|    iterations           | 8          |\n",
      "|    time_elapsed         | 71         |\n",
      "|    total_timesteps      | 16384      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.11490743 |\n",
      "|    clip_fraction        | 0.0754     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.11       |\n",
      "|    explained_variance   | 0.732      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.62e+04   |\n",
      "|    n_updates            | 2070       |\n",
      "|    policy_gradient_loss | 0.0114     |\n",
      "|    std                  | 0.0294     |\n",
      "|    value_loss           | 8.3e+03    |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 258       |\n",
      "|    ep_rew_mean          | 1.76e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 215       |\n",
      "|    iterations           | 9         |\n",
      "|    time_elapsed         | 85        |\n",
      "|    total_timesteps      | 18432     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 17.823017 |\n",
      "|    clip_fraction        | 0.0874    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.12      |\n",
      "|    explained_variance   | 0.457     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 4.21      |\n",
      "|    n_updates            | 2080      |\n",
      "|    policy_gradient_loss | 0.0433    |\n",
      "|    std                  | 0.0288    |\n",
      "|    value_loss           | 1.6e+04   |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 268          |\n",
      "|    ep_rew_mean          | 1.91e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 201          |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 101          |\n",
      "|    total_timesteps      | 20480        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0075738505 |\n",
      "|    clip_fraction        | 0.0587       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.16         |\n",
      "|    explained_variance   | -6.14e+04    |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 0.896        |\n",
      "|    n_updates            | 2090         |\n",
      "|    policy_gradient_loss | -0.00338     |\n",
      "|    std                  | 0.0272       |\n",
      "|    value_loss           | 3.69e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 267         |\n",
      "|    ep_rew_mean          | 1.89e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 195         |\n",
      "|    iterations           | 11          |\n",
      "|    time_elapsed         | 115         |\n",
      "|    total_timesteps      | 22528       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.025016602 |\n",
      "|    clip_fraction        | 0.0875      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.21        |\n",
      "|    explained_variance   | -10.4       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.12        |\n",
      "|    n_updates            | 2100        |\n",
      "|    policy_gradient_loss | -0.00966    |\n",
      "|    std                  | 0.0255      |\n",
      "|    value_loss           | 1.96        |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 275       |\n",
      "|    ep_rew_mean          | 2.01e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 196       |\n",
      "|    iterations           | 12        |\n",
      "|    time_elapsed         | 125       |\n",
      "|    total_timesteps      | 24576     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 14.594121 |\n",
      "|    clip_fraction        | 0.0345    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.26      |\n",
      "|    explained_variance   | 0.0157    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.65      |\n",
      "|    n_updates            | 2110      |\n",
      "|    policy_gradient_loss | 0.0026    |\n",
      "|    std                  | 0.0249    |\n",
      "|    value_loss           | 1.21e+04  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 274       |\n",
      "|    ep_rew_mean          | 2e+03     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 197       |\n",
      "|    iterations           | 13        |\n",
      "|    time_elapsed         | 135       |\n",
      "|    total_timesteps      | 26624     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.7500082 |\n",
      "|    clip_fraction        | 0.112     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.28      |\n",
      "|    explained_variance   | -0.634    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 6.63      |\n",
      "|    n_updates            | 2120      |\n",
      "|    policy_gradient_loss | 0.0199    |\n",
      "|    std                  | 0.0247    |\n",
      "|    value_loss           | 154       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 279       |\n",
      "|    ep_rew_mean          | 2.08e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 194       |\n",
      "|    iterations           | 14        |\n",
      "|    time_elapsed         | 147       |\n",
      "|    total_timesteps      | 28672     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.2398401 |\n",
      "|    clip_fraction        | 0.0768    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.29      |\n",
      "|    explained_variance   | 0.206     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.11      |\n",
      "|    n_updates            | 2130      |\n",
      "|    policy_gradient_loss | 0.0223    |\n",
      "|    std                  | 0.0242    |\n",
      "|    value_loss           | 9.52e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 283      |\n",
      "|    ep_rew_mean          | 2.14e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 195      |\n",
      "|    iterations           | 15       |\n",
      "|    time_elapsed         | 157      |\n",
      "|    total_timesteps      | 30720    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 42.58933 |\n",
      "|    clip_fraction        | 0.12     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.31     |\n",
      "|    explained_variance   | -0.28    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 7.15     |\n",
      "|    n_updates            | 2140     |\n",
      "|    policy_gradient_loss | 0.0821   |\n",
      "|    std                  | 0.024    |\n",
      "|    value_loss           | 120      |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 210        |\n",
      "|    ep_rew_mean          | 1.07e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 195        |\n",
      "|    iterations           | 16         |\n",
      "|    time_elapsed         | 167        |\n",
      "|    total_timesteps      | 32768      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.18722717 |\n",
      "|    clip_fraction        | 0.157      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.34       |\n",
      "|    explained_variance   | -0.877     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.803      |\n",
      "|    n_updates            | 2150       |\n",
      "|    policy_gradient_loss | -0.00297   |\n",
      "|    std                  | 0.0225     |\n",
      "|    value_loss           | 1.31       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 173        |\n",
      "|    ep_rew_mean          | 528        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 196        |\n",
      "|    iterations           | 17         |\n",
      "|    time_elapsed         | 177        |\n",
      "|    total_timesteps      | 34816      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.83980274 |\n",
      "|    clip_fraction        | 0.0651     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.38       |\n",
      "|    explained_variance   | 0.00179    |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 4.58       |\n",
      "|    n_updates            | 2160       |\n",
      "|    policy_gradient_loss | 0.0109     |\n",
      "|    std                  | 0.0224     |\n",
      "|    value_loss           | 3.69e+04   |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 74.6      |\n",
      "|    ep_rew_mean          | -926      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 195       |\n",
      "|    iterations           | 18        |\n",
      "|    time_elapsed         | 188       |\n",
      "|    total_timesteps      | 36864     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 7.7328415 |\n",
      "|    clip_fraction        | 0.0907    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.38      |\n",
      "|    explained_variance   | 0.889     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 97.8      |\n",
      "|    n_updates            | 2170      |\n",
      "|    policy_gradient_loss | 0.0273    |\n",
      "|    std                  | 0.0224    |\n",
      "|    value_loss           | 1.18e+04  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 70.5      |\n",
      "|    ep_rew_mean          | -986      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 195       |\n",
      "|    iterations           | 19        |\n",
      "|    time_elapsed         | 198       |\n",
      "|    total_timesteps      | 38912     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.7446357 |\n",
      "|    clip_fraction        | 0.186     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.38      |\n",
      "|    explained_variance   | 0.937     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 90.9      |\n",
      "|    n_updates            | 2180      |\n",
      "|    policy_gradient_loss | -0.0107   |\n",
      "|    std                  | 0.0224    |\n",
      "|    value_loss           | 7.91e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 70.5       |\n",
      "|    ep_rew_mean          | -984       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 196        |\n",
      "|    iterations           | 20         |\n",
      "|    time_elapsed         | 208        |\n",
      "|    total_timesteps      | 40960      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.92817926 |\n",
      "|    clip_fraction        | 0.265      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.38       |\n",
      "|    explained_variance   | 0.878      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.53       |\n",
      "|    n_updates            | 2190       |\n",
      "|    policy_gradient_loss | 0.00862    |\n",
      "|    std                  | 0.0224     |\n",
      "|    value_loss           | 6.83e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 87          |\n",
      "|    ep_rew_mean          | -731        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 196         |\n",
      "|    iterations           | 21          |\n",
      "|    time_elapsed         | 218         |\n",
      "|    total_timesteps      | 43008       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003184972 |\n",
      "|    clip_fraction        | 0.0284      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.38        |\n",
      "|    explained_variance   | 0.88        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.44        |\n",
      "|    n_updates            | 2200        |\n",
      "|    policy_gradient_loss | -0.00111    |\n",
      "|    std                  | 0.0223      |\n",
      "|    value_loss           | 1.23e+04    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 82.9      |\n",
      "|    ep_rew_mean          | -788      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 197       |\n",
      "|    iterations           | 22        |\n",
      "|    time_elapsed         | 228       |\n",
      "|    total_timesteps      | 45056     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.6370878 |\n",
      "|    clip_fraction        | 0.0484    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.38      |\n",
      "|    explained_variance   | 0.768     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 883       |\n",
      "|    n_updates            | 2210      |\n",
      "|    policy_gradient_loss | 0.00775   |\n",
      "|    std                  | 0.0223    |\n",
      "|    value_loss           | 6.85e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 83.2       |\n",
      "|    ep_rew_mean          | -763       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 197        |\n",
      "|    iterations           | 23         |\n",
      "|    time_elapsed         | 237        |\n",
      "|    total_timesteps      | 47104      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.19377017 |\n",
      "|    clip_fraction        | 0.104      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.39       |\n",
      "|    explained_variance   | 0.933      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 110        |\n",
      "|    n_updates            | 2220       |\n",
      "|    policy_gradient_loss | 0.0279     |\n",
      "|    std                  | 0.0222     |\n",
      "|    value_loss           | 9.03e+03   |\n",
      "----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 83.2     |\n",
      "|    ep_rew_mean          | -763     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 198      |\n",
      "|    iterations           | 24       |\n",
      "|    time_elapsed         | 247      |\n",
      "|    total_timesteps      | 49152    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 2.485311 |\n",
      "|    clip_fraction        | 0.0563   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.39     |\n",
      "|    explained_variance   | 0.915    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 3.29e+04 |\n",
      "|    n_updates            | 2230     |\n",
      "|    policy_gradient_loss | -0.00986 |\n",
      "|    std                  | 0.0222   |\n",
      "|    value_loss           | 2.44e+03 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 91.4     |\n",
      "|    ep_rew_mean          | -644     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 198      |\n",
      "|    iterations           | 25       |\n",
      "|    time_elapsed         | 257      |\n",
      "|    total_timesteps      | 51200    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 285.4392 |\n",
      "|    clip_fraction        | 0.165    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.39     |\n",
      "|    explained_variance   | 0.953    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 299      |\n",
      "|    n_updates            | 2240     |\n",
      "|    policy_gradient_loss | 0.012    |\n",
      "|    std                  | 0.0221   |\n",
      "|    value_loss           | 5.32e+03 |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 104         |\n",
      "|    ep_rew_mean          | -476        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 199         |\n",
      "|    iterations           | 26          |\n",
      "|    time_elapsed         | 267         |\n",
      "|    total_timesteps      | 53248       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008826156 |\n",
      "|    clip_fraction        | 0.118       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.42        |\n",
      "|    explained_variance   | -0.0687     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 0.466       |\n",
      "|    n_updates            | 2250        |\n",
      "|    policy_gradient_loss | 0.00067     |\n",
      "|    std                  | 0.0211      |\n",
      "|    value_loss           | 1.79e+04    |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 112      |\n",
      "|    ep_rew_mean          | -352     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 199      |\n",
      "|    iterations           | 27       |\n",
      "|    time_elapsed         | 276      |\n",
      "|    total_timesteps      | 55296    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 0.442265 |\n",
      "|    clip_fraction        | 0.145    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.46     |\n",
      "|    explained_variance   | -0.774   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.893    |\n",
      "|    n_updates            | 2260     |\n",
      "|    policy_gradient_loss | 0.0335   |\n",
      "|    std                  | 0.0205   |\n",
      "|    value_loss           | 1.16e+04 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 132       |\n",
      "|    ep_rew_mean          | -54       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 200       |\n",
      "|    iterations           | 28        |\n",
      "|    time_elapsed         | 286       |\n",
      "|    total_timesteps      | 57344     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.3543968 |\n",
      "|    clip_fraction        | 0.12      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.48      |\n",
      "|    explained_variance   | 0.364     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 6.35e+03  |\n",
      "|    n_updates            | 2270      |\n",
      "|    policy_gradient_loss | 0.0591    |\n",
      "|    std                  | 0.0202    |\n",
      "|    value_loss           | 6.02e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 144       |\n",
      "|    ep_rew_mean          | 127       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 199       |\n",
      "|    iterations           | 29        |\n",
      "|    time_elapsed         | 298       |\n",
      "|    total_timesteps      | 59392     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4.5748835 |\n",
      "|    clip_fraction        | 0.0963    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.5       |\n",
      "|    explained_variance   | -0.121    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.478     |\n",
      "|    n_updates            | 2280      |\n",
      "|    policy_gradient_loss | 0.00618   |\n",
      "|    std                  | 0.0194    |\n",
      "|    value_loss           | 3.02e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 165       |\n",
      "|    ep_rew_mean          | 428       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 198       |\n",
      "|    iterations           | 30        |\n",
      "|    time_elapsed         | 309       |\n",
      "|    total_timesteps      | 61440     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.2822318 |\n",
      "|    clip_fraction        | 0.148     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.54      |\n",
      "|    explained_variance   | 0.436     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 15.5      |\n",
      "|    n_updates            | 2290      |\n",
      "|    policy_gradient_loss | 0.0171    |\n",
      "|    std                  | 0.0187    |\n",
      "|    value_loss           | 9.6e+03   |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 152         |\n",
      "|    ep_rew_mean          | 225         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 198         |\n",
      "|    iterations           | 31          |\n",
      "|    time_elapsed         | 320         |\n",
      "|    total_timesteps      | 63488       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005269539 |\n",
      "|    clip_fraction        | 0.0641      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.57        |\n",
      "|    explained_variance   | 0.532       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.75        |\n",
      "|    n_updates            | 2300        |\n",
      "|    policy_gradient_loss | 0.00097     |\n",
      "|    std                  | 0.0182      |\n",
      "|    value_loss           | 1.04e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 173          |\n",
      "|    ep_rew_mean          | 527          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 198          |\n",
      "|    iterations           | 32           |\n",
      "|    time_elapsed         | 329          |\n",
      "|    total_timesteps      | 65536        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029355749 |\n",
      "|    clip_fraction        | 0.0195       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 2.59         |\n",
      "|    explained_variance   | 0.879        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 819          |\n",
      "|    n_updates            | 2310         |\n",
      "|    policy_gradient_loss | -0.00158     |\n",
      "|    std                  | 0.018        |\n",
      "|    value_loss           | 1.14e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 185        |\n",
      "|    ep_rew_mean          | 706        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 198        |\n",
      "|    iterations           | 33         |\n",
      "|    time_elapsed         | 339        |\n",
      "|    total_timesteps      | 67584      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.11051528 |\n",
      "|    clip_fraction        | 0.0351     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.6        |\n",
      "|    explained_variance   | 0.738      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.306      |\n",
      "|    n_updates            | 2320       |\n",
      "|    policy_gradient_loss | 0.00605    |\n",
      "|    std                  | 0.0178     |\n",
      "|    value_loss           | 5.17e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 197       |\n",
      "|    ep_rew_mean          | 889       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 199       |\n",
      "|    iterations           | 34        |\n",
      "|    time_elapsed         | 349       |\n",
      "|    total_timesteps      | 69632     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.5712682 |\n",
      "|    clip_fraction        | 0.0669    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.61      |\n",
      "|    explained_variance   | -0.884    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.845     |\n",
      "|    n_updates            | 2330      |\n",
      "|    policy_gradient_loss | 0.0329    |\n",
      "|    std                  | 0.0178    |\n",
      "|    value_loss           | 6.26e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 193        |\n",
      "|    ep_rew_mean          | 828        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 201        |\n",
      "|    iterations           | 35         |\n",
      "|    time_elapsed         | 356        |\n",
      "|    total_timesteps      | 71680      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.06210746 |\n",
      "|    clip_fraction        | 0.166      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.63       |\n",
      "|    explained_variance   | 0.18       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.946      |\n",
      "|    n_updates            | 2340       |\n",
      "|    policy_gradient_loss | 0.0075     |\n",
      "|    std                  | 0.017      |\n",
      "|    value_loss           | 8.12e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 189       |\n",
      "|    ep_rew_mean          | 768       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 203       |\n",
      "|    iterations           | 36        |\n",
      "|    time_elapsed         | 362       |\n",
      "|    total_timesteps      | 73728     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.2214944 |\n",
      "|    clip_fraction        | 0.0798    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.66      |\n",
      "|    explained_variance   | 0.0633    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.76      |\n",
      "|    n_updates            | 2350      |\n",
      "|    policy_gradient_loss | 0.0043    |\n",
      "|    std                  | 0.0169    |\n",
      "|    value_loss           | 7.74e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 136       |\n",
      "|    ep_rew_mean          | 1.15      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 205       |\n",
      "|    iterations           | 37        |\n",
      "|    time_elapsed         | 369       |\n",
      "|    total_timesteps      | 75776     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 11.655151 |\n",
      "|    clip_fraction        | 0.174     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.66      |\n",
      "|    explained_variance   | 0.841     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 4.02      |\n",
      "|    n_updates            | 2360      |\n",
      "|    policy_gradient_loss | 0.0116    |\n",
      "|    std                  | 0.0167    |\n",
      "|    value_loss           | 2.99e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 140        |\n",
      "|    ep_rew_mean          | 61.8       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 206        |\n",
      "|    iterations           | 38         |\n",
      "|    time_elapsed         | 376        |\n",
      "|    total_timesteps      | 77824      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03369149 |\n",
      "|    clip_fraction        | 0.0806     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.67       |\n",
      "|    explained_variance   | 0.888      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 504        |\n",
      "|    n_updates            | 2370       |\n",
      "|    policy_gradient_loss | 0.000908   |\n",
      "|    std                  | 0.0166     |\n",
      "|    value_loss           | 8.21e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 112       |\n",
      "|    ep_rew_mean          | -363      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 208       |\n",
      "|    iterations           | 39        |\n",
      "|    time_elapsed         | 382       |\n",
      "|    total_timesteps      | 79872     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4.0034413 |\n",
      "|    clip_fraction        | 0.117     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.69      |\n",
      "|    explained_variance   | 0.979     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 806       |\n",
      "|    n_updates            | 2380      |\n",
      "|    policy_gradient_loss | 0.00824   |\n",
      "|    std                  | 0.0162    |\n",
      "|    value_loss           | 603       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 95.2      |\n",
      "|    ep_rew_mean          | -604      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 210       |\n",
      "|    iterations           | 40        |\n",
      "|    time_elapsed         | 389       |\n",
      "|    total_timesteps      | 81920     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.6497625 |\n",
      "|    clip_fraction        | 0.193     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.71      |\n",
      "|    explained_variance   | 0.886     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 29.4      |\n",
      "|    n_updates            | 2390      |\n",
      "|    policy_gradient_loss | 0.123     |\n",
      "|    std                  | 0.0162    |\n",
      "|    value_loss           | 5.93e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 103       |\n",
      "|    ep_rew_mean          | -483      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 211       |\n",
      "|    iterations           | 41        |\n",
      "|    time_elapsed         | 396       |\n",
      "|    total_timesteps      | 83968     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 16.335226 |\n",
      "|    clip_fraction        | 0.123     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.71      |\n",
      "|    explained_variance   | 0.997     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 205       |\n",
      "|    n_updates            | 2400      |\n",
      "|    policy_gradient_loss | 0.0896    |\n",
      "|    std                  | 0.0161    |\n",
      "|    value_loss           | 72.3      |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 120         |\n",
      "|    ep_rew_mean          | -241        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 213         |\n",
      "|    iterations           | 42          |\n",
      "|    time_elapsed         | 403         |\n",
      "|    total_timesteps      | 86016       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.022509642 |\n",
      "|    clip_fraction        | 0.0684      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 2.74        |\n",
      "|    explained_variance   | -0.466      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.16        |\n",
      "|    n_updates            | 2410        |\n",
      "|    policy_gradient_loss | -0.003      |\n",
      "|    std                  | 0.0152      |\n",
      "|    value_loss           | 6.67e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 140        |\n",
      "|    ep_rew_mean          | 61         |\n",
      "| time/                   |            |\n",
      "|    fps                  | 214        |\n",
      "|    iterations           | 43         |\n",
      "|    time_elapsed         | 410        |\n",
      "|    total_timesteps      | 88064      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.07821562 |\n",
      "|    clip_fraction        | 0.0922     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.81       |\n",
      "|    explained_variance   | -1.15e+04  |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.258      |\n",
      "|    n_updates            | 2420       |\n",
      "|    policy_gradient_loss | 0.00624    |\n",
      "|    std                  | 0.0142     |\n",
      "|    value_loss           | 294        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 157        |\n",
      "|    ep_rew_mean          | 302        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 216        |\n",
      "|    iterations           | 44         |\n",
      "|    time_elapsed         | 416        |\n",
      "|    total_timesteps      | 90112      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.10490927 |\n",
      "|    clip_fraction        | 0.0859     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.85       |\n",
      "|    explained_variance   | -0.956     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.606      |\n",
      "|    n_updates            | 2430       |\n",
      "|    policy_gradient_loss | -0.00571   |\n",
      "|    std                  | 0.0138     |\n",
      "|    value_loss           | 1.62       |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 173       |\n",
      "|    ep_rew_mean          | 531       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 217       |\n",
      "|    iterations           | 45        |\n",
      "|    time_elapsed         | 423       |\n",
      "|    total_timesteps      | 92160     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 115.46626 |\n",
      "|    clip_fraction        | 0.211     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.87      |\n",
      "|    explained_variance   | 0.000222  |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.737     |\n",
      "|    n_updates            | 2440      |\n",
      "|    policy_gradient_loss | 0.0684    |\n",
      "|    std                  | 0.0137    |\n",
      "|    value_loss           | 4.34e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 193        |\n",
      "|    ep_rew_mean          | 834        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 218        |\n",
      "|    iterations           | 46         |\n",
      "|    time_elapsed         | 430        |\n",
      "|    total_timesteps      | 94208      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.09355858 |\n",
      "|    clip_fraction        | 0.132      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.88       |\n",
      "|    explained_variance   | 0.0209     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 0.309      |\n",
      "|    n_updates            | 2450       |\n",
      "|    policy_gradient_loss | 0.00712    |\n",
      "|    std                  | 0.0134     |\n",
      "|    value_loss           | 4.31e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 210       |\n",
      "|    ep_rew_mean          | 1.07e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 220       |\n",
      "|    iterations           | 47        |\n",
      "|    time_elapsed         | 436       |\n",
      "|    total_timesteps      | 96256     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.7124404 |\n",
      "|    clip_fraction        | 0.112     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.93      |\n",
      "|    explained_variance   | 0.312     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.531     |\n",
      "|    n_updates            | 2460      |\n",
      "|    policy_gradient_loss | -0.0031   |\n",
      "|    std                  | 0.0126    |\n",
      "|    value_loss           | 1.09      |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 230      |\n",
      "|    ep_rew_mean          | 1.38e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 221      |\n",
      "|    iterations           | 48       |\n",
      "|    time_elapsed         | 443      |\n",
      "|    total_timesteps      | 98304    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 1894.781 |\n",
      "|    clip_fraction        | 0.0904   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.98     |\n",
      "|    explained_variance   | -0.818   |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.181    |\n",
      "|    n_updates            | 2470     |\n",
      "|    policy_gradient_loss | -0.00615 |\n",
      "|    std                  | 0.0121   |\n",
      "|    value_loss           | 4.11     |\n",
      "--------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 234        |\n",
      "|    ep_rew_mean          | 1.44e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 222        |\n",
      "|    iterations           | 49         |\n",
      "|    time_elapsed         | 450        |\n",
      "|    total_timesteps      | 100352     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01668311 |\n",
      "|    clip_fraction        | 0.0434     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 3.01       |\n",
      "|    explained_variance   | -0.438     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 4.12       |\n",
      "|    n_updates            | 2480       |\n",
      "|    policy_gradient_loss | 0.00171    |\n",
      "|    std                  | 0.0118     |\n",
      "|    value_loss           | 3.09       |\n",
      "----------------------------------------\n",
      "--- 452.82445454597473 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.5\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 130      |\n",
      "|    ep_rew_mean     | 535      |\n",
      "| time/              |          |\n",
      "|    fps             | 496      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 135         |\n",
      "|    ep_rew_mean          | 560         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 372         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 10          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006153426 |\n",
      "|    clip_fraction        | 0.023       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.883       |\n",
      "|    explained_variance   | 0.901       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.16e+03    |\n",
      "|    n_updates            | 2010        |\n",
      "|    policy_gradient_loss | -0.0035     |\n",
      "|    std                  | 0.1         |\n",
      "|    value_loss           | 5e+03       |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 123         |\n",
      "|    ep_rew_mean          | 434         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 343         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 17          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008780757 |\n",
      "|    clip_fraction        | 0.0256      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.883       |\n",
      "|    explained_variance   | 0.936       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.28e+03    |\n",
      "|    n_updates            | 2020        |\n",
      "|    policy_gradient_loss | 0.000575    |\n",
      "|    std                  | 0.1         |\n",
      "|    value_loss           | 6.86e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 122         |\n",
      "|    ep_rew_mean          | 424         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 336         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 24          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.048760924 |\n",
      "|    clip_fraction        | 0.0649      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.883       |\n",
      "|    explained_variance   | 0.978       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.76e+03    |\n",
      "|    n_updates            | 2030        |\n",
      "|    policy_gradient_loss | 0.00739     |\n",
      "|    std                  | 0.1         |\n",
      "|    value_loss           | 4.87e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 129        |\n",
      "|    ep_rew_mean          | 493        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 331        |\n",
      "|    iterations           | 5          |\n",
      "|    time_elapsed         | 30         |\n",
      "|    total_timesteps      | 10240      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.20217168 |\n",
      "|    clip_fraction        | 0.0825     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.883      |\n",
      "|    explained_variance   | 0.992      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.09e+03   |\n",
      "|    n_updates            | 2040       |\n",
      "|    policy_gradient_loss | 0.0173     |\n",
      "|    std                  | 0.1        |\n",
      "|    value_loss           | 1.97e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 138          |\n",
      "|    ep_rew_mean          | 602          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 327          |\n",
      "|    iterations           | 6            |\n",
      "|    time_elapsed         | 37           |\n",
      "|    total_timesteps      | 12288        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0078767575 |\n",
      "|    clip_fraction        | 0.0448       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.883        |\n",
      "|    explained_variance   | 0.959        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 823          |\n",
      "|    n_updates            | 2050         |\n",
      "|    policy_gradient_loss | -0.00416     |\n",
      "|    std                  | 0.1          |\n",
      "|    value_loss           | 6.01e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 143        |\n",
      "|    ep_rew_mean          | 667        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 322        |\n",
      "|    iterations           | 7          |\n",
      "|    time_elapsed         | 44         |\n",
      "|    total_timesteps      | 14336      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.37187016 |\n",
      "|    clip_fraction        | 0.106      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.883      |\n",
      "|    explained_variance   | 0.993      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 352        |\n",
      "|    n_updates            | 2060       |\n",
      "|    policy_gradient_loss | 0.0259     |\n",
      "|    std                  | 0.1        |\n",
      "|    value_loss           | 827        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 152         |\n",
      "|    ep_rew_mean          | 765         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 320         |\n",
      "|    iterations           | 8           |\n",
      "|    time_elapsed         | 51          |\n",
      "|    total_timesteps      | 16384       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003527001 |\n",
      "|    clip_fraction        | 0.0264      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.883       |\n",
      "|    explained_variance   | 0.931       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 773         |\n",
      "|    n_updates            | 2070        |\n",
      "|    policy_gradient_loss | -0.000734   |\n",
      "|    std                  | 0.1         |\n",
      "|    value_loss           | 2.99e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 167          |\n",
      "|    ep_rew_mean          | 940          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 318          |\n",
      "|    iterations           | 9            |\n",
      "|    time_elapsed         | 57           |\n",
      "|    total_timesteps      | 18432        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0026698732 |\n",
      "|    clip_fraction        | 0.0546       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.884        |\n",
      "|    explained_variance   | 0.892        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 196          |\n",
      "|    n_updates            | 2080         |\n",
      "|    policy_gradient_loss | 0.00471      |\n",
      "|    std                  | 0.0998       |\n",
      "|    value_loss           | 1.17e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 179         |\n",
      "|    ep_rew_mean          | 1.07e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 317         |\n",
      "|    iterations           | 10          |\n",
      "|    time_elapsed         | 64          |\n",
      "|    total_timesteps      | 20480       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024577051 |\n",
      "|    clip_fraction        | 0.0476      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.886       |\n",
      "|    explained_variance   | 0.801       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 94.3        |\n",
      "|    n_updates            | 2090        |\n",
      "|    policy_gradient_loss | 0.00219     |\n",
      "|    std                  | 0.0996      |\n",
      "|    value_loss           | 683         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 194        |\n",
      "|    ep_rew_mean          | 1.25e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 317        |\n",
      "|    iterations           | 11         |\n",
      "|    time_elapsed         | 71         |\n",
      "|    total_timesteps      | 22528      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.18395767 |\n",
      "|    clip_fraction        | 0.103      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.894      |\n",
      "|    explained_variance   | 0.21       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 9.18       |\n",
      "|    n_updates            | 2100       |\n",
      "|    policy_gradient_loss | 0.00787    |\n",
      "|    std                  | 0.0983     |\n",
      "|    value_loss           | 90.4       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 202        |\n",
      "|    ep_rew_mean          | 1.35e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 316        |\n",
      "|    iterations           | 12         |\n",
      "|    time_elapsed         | 77         |\n",
      "|    total_timesteps      | 24576      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.04029999 |\n",
      "|    clip_fraction        | 0.0569     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.906      |\n",
      "|    explained_variance   | 0.839      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 28.1       |\n",
      "|    n_updates            | 2110       |\n",
      "|    policy_gradient_loss | 0.00193    |\n",
      "|    std                  | 0.0973     |\n",
      "|    value_loss           | 231        |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 213       |\n",
      "|    ep_rew_mean          | 1.48e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 315       |\n",
      "|    iterations           | 13        |\n",
      "|    time_elapsed         | 84        |\n",
      "|    total_timesteps      | 26624     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4.4845953 |\n",
      "|    clip_fraction        | 0.0877    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.918     |\n",
      "|    explained_variance   | 0.991     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 72.8      |\n",
      "|    n_updates            | 2120      |\n",
      "|    policy_gradient_loss | 0.0127    |\n",
      "|    std                  | 0.0959    |\n",
      "|    value_loss           | 142       |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 227         |\n",
      "|    ep_rew_mean          | 1.64e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 314         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 91          |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001594343 |\n",
      "|    clip_fraction        | 0.0393      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.931       |\n",
      "|    explained_variance   | 0.885       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 113         |\n",
      "|    n_updates            | 2130        |\n",
      "|    policy_gradient_loss | 0.00602     |\n",
      "|    std                  | 0.095       |\n",
      "|    value_loss           | 975         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 241        |\n",
      "|    ep_rew_mean          | 1.8e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 314        |\n",
      "|    iterations           | 15         |\n",
      "|    time_elapsed         | 97         |\n",
      "|    total_timesteps      | 30720      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.08031851 |\n",
      "|    clip_fraction        | 0.067      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.936      |\n",
      "|    explained_variance   | 0.893      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 341        |\n",
      "|    n_updates            | 2140       |\n",
      "|    policy_gradient_loss | 0.0248     |\n",
      "|    std                  | 0.0949     |\n",
      "|    value_loss           | 5.39e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 257       |\n",
      "|    ep_rew_mean          | 1.98e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 313       |\n",
      "|    iterations           | 16        |\n",
      "|    time_elapsed         | 104       |\n",
      "|    total_timesteps      | 32768     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.3646247 |\n",
      "|    clip_fraction        | 0.0691    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.939     |\n",
      "|    explained_variance   | 0.725     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 315       |\n",
      "|    n_updates            | 2150      |\n",
      "|    policy_gradient_loss | 0.00991   |\n",
      "|    std                  | 0.0944    |\n",
      "|    value_loss           | 5.14e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 259       |\n",
      "|    ep_rew_mean          | 1.91e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 312       |\n",
      "|    iterations           | 17        |\n",
      "|    time_elapsed         | 111       |\n",
      "|    total_timesteps      | 34816     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.1614254 |\n",
      "|    clip_fraction        | 0.233     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.95      |\n",
      "|    explained_variance   | -0.857    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 10.8      |\n",
      "|    n_updates            | 2160      |\n",
      "|    policy_gradient_loss | 0.0411    |\n",
      "|    std                  | 0.0928    |\n",
      "|    value_loss           | 89.9      |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 229         |\n",
      "|    ep_rew_mean          | 1.45e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 18          |\n",
      "|    time_elapsed         | 118         |\n",
      "|    total_timesteps      | 36864       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008834675 |\n",
      "|    clip_fraction        | 0.281       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.958       |\n",
      "|    explained_variance   | 0.329       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.44e+04    |\n",
      "|    n_updates            | 2170        |\n",
      "|    policy_gradient_loss | 0.0629      |\n",
      "|    std                  | 0.0928      |\n",
      "|    value_loss           | 3.47e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 215         |\n",
      "|    ep_rew_mean          | 1.25e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 19          |\n",
      "|    time_elapsed         | 124         |\n",
      "|    total_timesteps      | 38912       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004344494 |\n",
      "|    clip_fraction        | 0.0252      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.958       |\n",
      "|    explained_variance   | 0.82        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 9.91e+03    |\n",
      "|    n_updates            | 2180        |\n",
      "|    policy_gradient_loss | 0.000345    |\n",
      "|    std                  | 0.0928      |\n",
      "|    value_loss           | 2.45e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 169         |\n",
      "|    ep_rew_mean          | 629         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 20          |\n",
      "|    time_elapsed         | 131         |\n",
      "|    total_timesteps      | 40960       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.029764857 |\n",
      "|    clip_fraction        | 0.058       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.958       |\n",
      "|    explained_variance   | 0.924       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 367         |\n",
      "|    n_updates            | 2190        |\n",
      "|    policy_gradient_loss | -0.000475   |\n",
      "|    std                  | 0.0929      |\n",
      "|    value_loss           | 4.6e+03     |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 136          |\n",
      "|    ep_rew_mean          | 199          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 311          |\n",
      "|    iterations           | 21           |\n",
      "|    time_elapsed         | 137          |\n",
      "|    total_timesteps      | 43008        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0041083083 |\n",
      "|    clip_fraction        | 0.0633       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.958        |\n",
      "|    explained_variance   | 0.931        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 113          |\n",
      "|    n_updates            | 2200         |\n",
      "|    policy_gradient_loss | 0.000208     |\n",
      "|    std                  | 0.0929       |\n",
      "|    value_loss           | 1.1e+04      |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 107         |\n",
      "|    ep_rew_mean          | -154        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 22          |\n",
      "|    time_elapsed         | 144         |\n",
      "|    total_timesteps      | 45056       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.022727575 |\n",
      "|    clip_fraction        | 0.0684      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.958       |\n",
      "|    explained_variance   | 0.963       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 149         |\n",
      "|    n_updates            | 2210        |\n",
      "|    policy_gradient_loss | 0.00429     |\n",
      "|    std                  | 0.0928      |\n",
      "|    value_loss           | 6.54e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 111         |\n",
      "|    ep_rew_mean          | -120        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 311         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 151         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.029716043 |\n",
      "|    clip_fraction        | 0.115       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.958       |\n",
      "|    explained_variance   | 0.944       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.47e+04    |\n",
      "|    n_updates            | 2220        |\n",
      "|    policy_gradient_loss | 0.00301     |\n",
      "|    std                  | 0.0928      |\n",
      "|    value_loss           | 1.57e+04    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 117        |\n",
      "|    ep_rew_mean          | -14.1      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 310        |\n",
      "|    iterations           | 24         |\n",
      "|    time_elapsed         | 158        |\n",
      "|    total_timesteps      | 49152      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02990606 |\n",
      "|    clip_fraction        | 0.145      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.959      |\n",
      "|    explained_variance   | 0.979      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 64.8       |\n",
      "|    n_updates            | 2230       |\n",
      "|    policy_gradient_loss | 0.00666    |\n",
      "|    std                  | 0.0927     |\n",
      "|    value_loss           | 5.05e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 124       |\n",
      "|    ep_rew_mean          | 74.3      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 310       |\n",
      "|    iterations           | 25        |\n",
      "|    time_elapsed         | 164       |\n",
      "|    total_timesteps      | 51200     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.2757532 |\n",
      "|    clip_fraction        | 0.295     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.96      |\n",
      "|    explained_variance   | 0.95      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 495       |\n",
      "|    n_updates            | 2240      |\n",
      "|    policy_gradient_loss | 0.0293    |\n",
      "|    std                  | 0.0926    |\n",
      "|    value_loss           | 8.69e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 137        |\n",
      "|    ep_rew_mean          | 261        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 310        |\n",
      "|    iterations           | 26         |\n",
      "|    time_elapsed         | 171        |\n",
      "|    total_timesteps      | 53248      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.19142699 |\n",
      "|    clip_fraction        | 0.163      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.964      |\n",
      "|    explained_variance   | 0.856      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 248        |\n",
      "|    n_updates            | 2250       |\n",
      "|    policy_gradient_loss | -0.0105    |\n",
      "|    std                  | 0.092      |\n",
      "|    value_loss           | 2.55e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 147       |\n",
      "|    ep_rew_mean          | 382       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 310       |\n",
      "|    iterations           | 27        |\n",
      "|    time_elapsed         | 178       |\n",
      "|    total_timesteps      | 55296     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.2225788 |\n",
      "|    clip_fraction        | 0.127     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.969     |\n",
      "|    explained_variance   | 0.857     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 77.6      |\n",
      "|    n_updates            | 2260      |\n",
      "|    policy_gradient_loss | 0.00168   |\n",
      "|    std                  | 0.0917    |\n",
      "|    value_loss           | 3.97e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 159         |\n",
      "|    ep_rew_mean          | 545         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 310         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 184         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008840963 |\n",
      "|    clip_fraction        | 0.107       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.97        |\n",
      "|    explained_variance   | 0.714       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 39.2        |\n",
      "|    n_updates            | 2270        |\n",
      "|    policy_gradient_loss | 0.0136      |\n",
      "|    std                  | 0.0917      |\n",
      "|    value_loss           | 495         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 158        |\n",
      "|    ep_rew_mean          | 502        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 307        |\n",
      "|    iterations           | 29         |\n",
      "|    time_elapsed         | 193        |\n",
      "|    total_timesteps      | 59392      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.23814078 |\n",
      "|    clip_fraction        | 0.106      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.975      |\n",
      "|    explained_variance   | -1.7       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 148        |\n",
      "|    n_updates            | 2280       |\n",
      "|    policy_gradient_loss | 0.0248     |\n",
      "|    std                  | 0.0908     |\n",
      "|    value_loss           | 2.25e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 152          |\n",
      "|    ep_rew_mean          | 491          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 302          |\n",
      "|    iterations           | 30           |\n",
      "|    time_elapsed         | 203          |\n",
      "|    total_timesteps      | 61440        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0024610176 |\n",
      "|    clip_fraction        | 0.0563       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.979        |\n",
      "|    explained_variance   | 0.589        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.48e+04     |\n",
      "|    n_updates            | 2290         |\n",
      "|    policy_gradient_loss | 0.00293      |\n",
      "|    std                  | 0.091        |\n",
      "|    value_loss           | 2.52e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 139          |\n",
      "|    ep_rew_mean          | 350          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 298          |\n",
      "|    iterations           | 31           |\n",
      "|    time_elapsed         | 212          |\n",
      "|    total_timesteps      | 63488        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0032389574 |\n",
      "|    clip_fraction        | 0.0446       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.978        |\n",
      "|    explained_variance   | 0.93         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 256          |\n",
      "|    n_updates            | 2300         |\n",
      "|    policy_gradient_loss | 0.000918     |\n",
      "|    std                  | 0.091        |\n",
      "|    value_loss           | 7.91e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 110        |\n",
      "|    ep_rew_mean          | -29.4      |\n",
      "| time/                   |            |\n",
      "|    fps                  | 295        |\n",
      "|    iterations           | 32         |\n",
      "|    time_elapsed         | 221        |\n",
      "|    total_timesteps      | 65536      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01524906 |\n",
      "|    clip_fraction        | 0.0993     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.977      |\n",
      "|    explained_variance   | 0.942      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 922        |\n",
      "|    n_updates            | 2310       |\n",
      "|    policy_gradient_loss | 0.00266    |\n",
      "|    std                  | 0.0912     |\n",
      "|    value_loss           | 1.1e+04    |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 87.8       |\n",
      "|    ep_rew_mean          | -276       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 291        |\n",
      "|    iterations           | 33         |\n",
      "|    time_elapsed         | 231        |\n",
      "|    total_timesteps      | 67584      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00516248 |\n",
      "|    clip_fraction        | 0.0556     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.977      |\n",
      "|    explained_variance   | 0.913      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 5.2e+04    |\n",
      "|    n_updates            | 2320       |\n",
      "|    policy_gradient_loss | -0.00252   |\n",
      "|    std                  | 0.091      |\n",
      "|    value_loss           | 2.29e+04   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 95.4       |\n",
      "|    ep_rew_mean          | -181       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 288        |\n",
      "|    iterations           | 34         |\n",
      "|    time_elapsed         | 241        |\n",
      "|    total_timesteps      | 69632      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01294367 |\n",
      "|    clip_fraction        | 0.1        |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.979      |\n",
      "|    explained_variance   | 0.945      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 303        |\n",
      "|    n_updates            | 2330       |\n",
      "|    policy_gradient_loss | 0.00396    |\n",
      "|    std                  | 0.0909     |\n",
      "|    value_loss           | 8.69e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 109       |\n",
      "|    ep_rew_mean          | -59.2     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 286       |\n",
      "|    iterations           | 35        |\n",
      "|    time_elapsed         | 250       |\n",
      "|    total_timesteps      | 71680     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.5752945 |\n",
      "|    clip_fraction        | 0.14      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.982     |\n",
      "|    explained_variance   | 0.967     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 277       |\n",
      "|    n_updates            | 2340      |\n",
      "|    policy_gradient_loss | 0.0153    |\n",
      "|    std                  | 0.0905    |\n",
      "|    value_loss           | 8.69e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 122         |\n",
      "|    ep_rew_mean          | 74.3        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 283         |\n",
      "|    iterations           | 36          |\n",
      "|    time_elapsed         | 260         |\n",
      "|    total_timesteps      | 73728       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008352396 |\n",
      "|    clip_fraction        | 0.0276      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.985       |\n",
      "|    explained_variance   | 0.958       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.01e+04    |\n",
      "|    n_updates            | 2350        |\n",
      "|    policy_gradient_loss | -0.00235    |\n",
      "|    std                  | 0.0902      |\n",
      "|    value_loss           | 7.12e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 137        |\n",
      "|    ep_rew_mean          | 265        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 280        |\n",
      "|    iterations           | 37         |\n",
      "|    time_elapsed         | 269        |\n",
      "|    total_timesteps      | 75776      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.16687027 |\n",
      "|    clip_fraction        | 0.256      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.988      |\n",
      "|    explained_variance   | 0.988      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 188        |\n",
      "|    n_updates            | 2360       |\n",
      "|    policy_gradient_loss | 0.0766     |\n",
      "|    std                  | 0.0901     |\n",
      "|    value_loss           | 1.37e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 146        |\n",
      "|    ep_rew_mean          | 372        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 278        |\n",
      "|    iterations           | 38         |\n",
      "|    time_elapsed         | 279        |\n",
      "|    total_timesteps      | 77824      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.17712232 |\n",
      "|    clip_fraction        | 0.182      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.993      |\n",
      "|    explained_variance   | 0.981      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 108        |\n",
      "|    n_updates            | 2370       |\n",
      "|    policy_gradient_loss | -0.0121    |\n",
      "|    std                  | 0.0894     |\n",
      "|    value_loss           | 516        |\n",
      "----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 159      |\n",
      "|    ep_rew_mean          | 526      |\n",
      "| time/                   |          |\n",
      "|    fps                  | 276      |\n",
      "|    iterations           | 39       |\n",
      "|    time_elapsed         | 288      |\n",
      "|    total_timesteps      | 79872    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 1.225591 |\n",
      "|    clip_fraction        | 0.181    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.01     |\n",
      "|    explained_variance   | 0.399    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.13e+04 |\n",
      "|    n_updates            | 2380     |\n",
      "|    policy_gradient_loss | 0.00929  |\n",
      "|    std                  | 0.0878   |\n",
      "|    value_loss           | 9.29e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 167       |\n",
      "|    ep_rew_mean          | 618       |\n",
      "| time/                   |           |\n",
      "|    fps                  | 274       |\n",
      "|    iterations           | 40        |\n",
      "|    time_elapsed         | 298       |\n",
      "|    total_timesteps      | 81920     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.4635677 |\n",
      "|    clip_fraction        | 0.189     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.01      |\n",
      "|    explained_variance   | 0.934     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 510       |\n",
      "|    n_updates            | 2390      |\n",
      "|    policy_gradient_loss | 0.113     |\n",
      "|    std                  | 0.0877    |\n",
      "|    value_loss           | 7.57e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 180        |\n",
      "|    ep_rew_mean          | 803        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 272        |\n",
      "|    iterations           | 41         |\n",
      "|    time_elapsed         | 307        |\n",
      "|    total_timesteps      | 83968      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.20220993 |\n",
      "|    clip_fraction        | 0.0383     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.02       |\n",
      "|    explained_variance   | 0.764      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 4.5e+04    |\n",
      "|    n_updates            | 2400       |\n",
      "|    policy_gradient_loss | 0.0017     |\n",
      "|    std                  | 0.0873     |\n",
      "|    value_loss           | 1.67e+04   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 196        |\n",
      "|    ep_rew_mean          | 986        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 271        |\n",
      "|    iterations           | 42         |\n",
      "|    time_elapsed         | 317        |\n",
      "|    total_timesteps      | 86016      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.16464794 |\n",
      "|    clip_fraction        | 0.127      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.04       |\n",
      "|    explained_variance   | 0.912      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 75         |\n",
      "|    n_updates            | 2410       |\n",
      "|    policy_gradient_loss | 0.0139     |\n",
      "|    std                  | 0.0843     |\n",
      "|    value_loss           | 3.92e+03   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 210        |\n",
      "|    ep_rew_mean          | 1.16e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 269        |\n",
      "|    iterations           | 43         |\n",
      "|    time_elapsed         | 326        |\n",
      "|    total_timesteps      | 88064      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.35172474 |\n",
      "|    clip_fraction        | 0.119      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.06       |\n",
      "|    explained_variance   | 0.871      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 18.2       |\n",
      "|    n_updates            | 2420       |\n",
      "|    policy_gradient_loss | -0.00545   |\n",
      "|    std                  | 0.0825     |\n",
      "|    value_loss           | 3.76e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 221         |\n",
      "|    ep_rew_mean          | 1.32e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 267         |\n",
      "|    iterations           | 44          |\n",
      "|    time_elapsed         | 336         |\n",
      "|    total_timesteps      | 90112       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.018341076 |\n",
      "|    clip_fraction        | 0.125       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.08        |\n",
      "|    explained_variance   | 0.791       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 20.6        |\n",
      "|    n_updates            | 2430        |\n",
      "|    policy_gradient_loss | 0.041       |\n",
      "|    std                  | 0.0818      |\n",
      "|    value_loss           | 216         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 235          |\n",
      "|    ep_rew_mean          | 1.49e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 266          |\n",
      "|    iterations           | 45           |\n",
      "|    time_elapsed         | 345          |\n",
      "|    total_timesteps      | 92160        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0061844746 |\n",
      "|    clip_fraction        | 0.0538       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.09         |\n",
      "|    explained_variance   | 0.865        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 84.5         |\n",
      "|    n_updates            | 2440         |\n",
      "|    policy_gradient_loss | 0.00585      |\n",
      "|    std                  | 0.0817       |\n",
      "|    value_loss           | 4.33e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 244        |\n",
      "|    ep_rew_mean          | 1.6e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 265        |\n",
      "|    iterations           | 46         |\n",
      "|    time_elapsed         | 355        |\n",
      "|    total_timesteps      | 94208      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.09051638 |\n",
      "|    clip_fraction        | 0.13       |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.09       |\n",
      "|    explained_variance   | 0.726      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 19.9       |\n",
      "|    n_updates            | 2450       |\n",
      "|    policy_gradient_loss | 0.053      |\n",
      "|    std                  | 0.0816     |\n",
      "|    value_loss           | 3.51e+03   |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 253          |\n",
      "|    ep_rew_mean          | 1.72e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 263          |\n",
      "|    iterations           | 47           |\n",
      "|    time_elapsed         | 364          |\n",
      "|    total_timesteps      | 96256        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0071103848 |\n",
      "|    clip_fraction        | 0.0213       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.09         |\n",
      "|    explained_variance   | 0.694        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 142          |\n",
      "|    n_updates            | 2460         |\n",
      "|    policy_gradient_loss | 0.000466     |\n",
      "|    std                  | 0.0815       |\n",
      "|    value_loss           | 9.89e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 256        |\n",
      "|    ep_rew_mean          | 1.76e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 262        |\n",
      "|    iterations           | 48         |\n",
      "|    time_elapsed         | 374        |\n",
      "|    total_timesteps      | 98304      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.10883118 |\n",
      "|    clip_fraction        | 0.111      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.09       |\n",
      "|    explained_variance   | 0.463      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 27         |\n",
      "|    n_updates            | 2470       |\n",
      "|    policy_gradient_loss | 0.00926    |\n",
      "|    std                  | 0.0807     |\n",
      "|    value_loss           | 7.39e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 268         |\n",
      "|    ep_rew_mean          | 1.92e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 261         |\n",
      "|    iterations           | 49          |\n",
      "|    time_elapsed         | 383         |\n",
      "|    total_timesteps      | 100352      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.060101263 |\n",
      "|    clip_fraction        | 0.157       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | 0.484       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.78        |\n",
      "|    n_updates            | 2480        |\n",
      "|    policy_gradient_loss | 0.0168      |\n",
      "|    std                  | 0.0795      |\n",
      "|    value_loss           | 2.73e+03    |\n",
      "-----------------------------------------\n",
      "--- 387.55044174194336 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.9\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 281      |\n",
      "|    ep_rew_mean     | 2.34e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 348      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 5        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 289       |\n",
      "|    ep_rew_mean          | 2.36e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 267       |\n",
      "|    iterations           | 2         |\n",
      "|    time_elapsed         | 15        |\n",
      "|    total_timesteps      | 4096      |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.0617619 |\n",
      "|    clip_fraction        | 0.0198    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.44      |\n",
      "|    explained_variance   | 0.792     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 803       |\n",
      "|    n_updates            | 2010      |\n",
      "|    policy_gradient_loss | 0.00247   |\n",
      "|    std                  | 0.0571    |\n",
      "|    value_loss           | 1.55e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 323        |\n",
      "|    ep_rew_mean          | 2.77e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 246        |\n",
      "|    iterations           | 3          |\n",
      "|    time_elapsed         | 24         |\n",
      "|    total_timesteps      | 6144       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.16567966 |\n",
      "|    clip_fraction        | 0.133      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.45       |\n",
      "|    explained_variance   | 0.842      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 12.3       |\n",
      "|    n_updates            | 2020       |\n",
      "|    policy_gradient_loss | 0.011      |\n",
      "|    std                  | 0.0569     |\n",
      "|    value_loss           | 4.22e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 342       |\n",
      "|    ep_rew_mean          | 3e+03     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 237       |\n",
      "|    iterations           | 4         |\n",
      "|    time_elapsed         | 34        |\n",
      "|    total_timesteps      | 8192      |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.4636881 |\n",
      "|    clip_fraction        | 0.0491    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.45      |\n",
      "|    explained_variance   | 0.566     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 22.6      |\n",
      "|    n_updates            | 2030      |\n",
      "|    policy_gradient_loss | 0.0151    |\n",
      "|    std                  | 0.0568    |\n",
      "|    value_loss           | 318       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 342       |\n",
      "|    ep_rew_mean          | 3.02e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 233       |\n",
      "|    iterations           | 5         |\n",
      "|    time_elapsed         | 43        |\n",
      "|    total_timesteps      | 10240     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.4632023 |\n",
      "|    clip_fraction        | 0.0883    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.45      |\n",
      "|    explained_variance   | 0.447     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 8.22      |\n",
      "|    n_updates            | 2040      |\n",
      "|    policy_gradient_loss | 0.0231    |\n",
      "|    std                  | 0.0565    |\n",
      "|    value_loss           | 78.2      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 333      |\n",
      "|    ep_rew_mean          | 2.89e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 230      |\n",
      "|    iterations           | 6        |\n",
      "|    time_elapsed         | 53       |\n",
      "|    total_timesteps      | 12288    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 4.414893 |\n",
      "|    clip_fraction        | 0.0913   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.46     |\n",
      "|    explained_variance   | 0.916    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 132      |\n",
      "|    n_updates            | 2050     |\n",
      "|    policy_gradient_loss | 0.0469   |\n",
      "|    std                  | 0.0563   |\n",
      "|    value_loss           | 1.28e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 335       |\n",
      "|    ep_rew_mean          | 2.92e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 227       |\n",
      "|    iterations           | 7         |\n",
      "|    time_elapsed         | 62        |\n",
      "|    total_timesteps      | 14336     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.5262789 |\n",
      "|    clip_fraction        | 0.115     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.46      |\n",
      "|    explained_variance   | 0.723     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 256       |\n",
      "|    n_updates            | 2060      |\n",
      "|    policy_gradient_loss | 0.0126    |\n",
      "|    std                  | 0.0557    |\n",
      "|    value_loss           | 3.82e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 343      |\n",
      "|    ep_rew_mean          | 3.01e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 226      |\n",
      "|    iterations           | 8        |\n",
      "|    time_elapsed         | 72       |\n",
      "|    total_timesteps      | 16384    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 0.15928  |\n",
      "|    clip_fraction        | 0.0928   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.47     |\n",
      "|    explained_variance   | 0.973    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 4.7      |\n",
      "|    n_updates            | 2070     |\n",
      "|    policy_gradient_loss | 0.0533   |\n",
      "|    std                  | 0.0555   |\n",
      "|    value_loss           | 328      |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 349         |\n",
      "|    ep_rew_mean          | 3.09e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 224         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 81          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.066713646 |\n",
      "|    clip_fraction        | 0.0456      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.47        |\n",
      "|    explained_variance   | 0.29        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 125         |\n",
      "|    n_updates            | 2080        |\n",
      "|    policy_gradient_loss | 0.0197      |\n",
      "|    std                  | 0.0555      |\n",
      "|    value_loss           | 512         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 355         |\n",
      "|    ep_rew_mean          | 3.16e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 223         |\n",
      "|    iterations           | 10          |\n",
      "|    time_elapsed         | 91          |\n",
      "|    total_timesteps      | 20480       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.029006243 |\n",
      "|    clip_fraction        | 0.0768      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.48        |\n",
      "|    explained_variance   | -0.0088     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.83        |\n",
      "|    n_updates            | 2090        |\n",
      "|    policy_gradient_loss | 0.0266      |\n",
      "|    std                  | 0.0548      |\n",
      "|    value_loss           | 175         |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 359      |\n",
      "|    ep_rew_mean          | 3.22e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 222      |\n",
      "|    iterations           | 11       |\n",
      "|    time_elapsed         | 101      |\n",
      "|    total_timesteps      | 22528    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 8.41559  |\n",
      "|    clip_fraction        | 0.226    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.51     |\n",
      "|    explained_variance   | 0.656    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 3.17     |\n",
      "|    n_updates            | 2100     |\n",
      "|    policy_gradient_loss | 0.0263   |\n",
      "|    std                  | 0.0523   |\n",
      "|    value_loss           | 34.3     |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 363       |\n",
      "|    ep_rew_mean          | 3.27e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 222       |\n",
      "|    iterations           | 12        |\n",
      "|    time_elapsed         | 110       |\n",
      "|    total_timesteps      | 24576     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.3040607 |\n",
      "|    clip_fraction        | 0.294     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.55      |\n",
      "|    explained_variance   | 0.404     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 7.39      |\n",
      "|    n_updates            | 2110      |\n",
      "|    policy_gradient_loss | 0.0434    |\n",
      "|    std                  | 0.0509    |\n",
      "|    value_loss           | 14.8      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 361       |\n",
      "|    ep_rew_mean          | 3.23e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 221       |\n",
      "|    iterations           | 13        |\n",
      "|    time_elapsed         | 120       |\n",
      "|    total_timesteps      | 26624     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 5.7107477 |\n",
      "|    clip_fraction        | 0.531     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.57      |\n",
      "|    explained_variance   | 0.00988   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.69      |\n",
      "|    n_updates            | 2120      |\n",
      "|    policy_gradient_loss | 0.0635    |\n",
      "|    std                  | 0.0495    |\n",
      "|    value_loss           | 9.49      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 364       |\n",
      "|    ep_rew_mean          | 3.27e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 220       |\n",
      "|    iterations           | 14        |\n",
      "|    time_elapsed         | 130       |\n",
      "|    total_timesteps      | 28672     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 260.82184 |\n",
      "|    clip_fraction        | 0.306     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.6       |\n",
      "|    explained_variance   | 0.0528    |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 6.9       |\n",
      "|    n_updates            | 2130      |\n",
      "|    policy_gradient_loss | 0.0364    |\n",
      "|    std                  | 0.0485    |\n",
      "|    value_loss           | 3.42e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 367       |\n",
      "|    ep_rew_mean          | 3.31e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 219       |\n",
      "|    iterations           | 15        |\n",
      "|    time_elapsed         | 139       |\n",
      "|    total_timesteps      | 30720     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 20.565735 |\n",
      "|    clip_fraction        | 0.149     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.63      |\n",
      "|    explained_variance   | -36.7     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 4.49      |\n",
      "|    n_updates            | 2140      |\n",
      "|    policy_gradient_loss | 0.0619    |\n",
      "|    std                  | 0.046     |\n",
      "|    value_loss           | 288       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 369       |\n",
      "|    ep_rew_mean          | 3.34e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 218       |\n",
      "|    iterations           | 16        |\n",
      "|    time_elapsed         | 149       |\n",
      "|    total_timesteps      | 32768     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.3397973 |\n",
      "|    clip_fraction        | 0.171     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.68      |\n",
      "|    explained_variance   | 0.969     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 13.5      |\n",
      "|    n_updates            | 2150      |\n",
      "|    policy_gradient_loss | 0.0468    |\n",
      "|    std                  | 0.0446    |\n",
      "|    value_loss           | 7.92      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 372       |\n",
      "|    ep_rew_mean          | 3.36e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 218       |\n",
      "|    iterations           | 17        |\n",
      "|    time_elapsed         | 159       |\n",
      "|    total_timesteps      | 34816     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.2381827 |\n",
      "|    clip_fraction        | 0.262     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.7       |\n",
      "|    explained_variance   | 0.93      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.6       |\n",
      "|    n_updates            | 2160      |\n",
      "|    policy_gradient_loss | 0.0789    |\n",
      "|    std                  | 0.0438    |\n",
      "|    value_loss           | 9.88      |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 370       |\n",
      "|    ep_rew_mean          | 3.33e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 218       |\n",
      "|    iterations           | 18        |\n",
      "|    time_elapsed         | 168       |\n",
      "|    total_timesteps      | 36864     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.8670678 |\n",
      "|    clip_fraction        | 0.148     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.71      |\n",
      "|    explained_variance   | 0.917     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 27.7      |\n",
      "|    n_updates            | 2170      |\n",
      "|    policy_gradient_loss | 0.106     |\n",
      "|    std                  | 0.0432    |\n",
      "|    value_loss           | 38.4      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 374      |\n",
      "|    ep_rew_mean          | 3.38e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 218      |\n",
      "|    iterations           | 19       |\n",
      "|    time_elapsed         | 178      |\n",
      "|    total_timesteps      | 38912    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 2.122447 |\n",
      "|    clip_fraction        | 0.142    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.73     |\n",
      "|    explained_variance   | 0.126    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 2.07     |\n",
      "|    n_updates            | 2180     |\n",
      "|    policy_gradient_loss | 0.0773   |\n",
      "|    std                  | 0.0424   |\n",
      "|    value_loss           | 2.92e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 378       |\n",
      "|    ep_rew_mean          | 3.43e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 218       |\n",
      "|    iterations           | 20        |\n",
      "|    time_elapsed         | 187       |\n",
      "|    total_timesteps      | 40960     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.4964012 |\n",
      "|    clip_fraction        | 0.22      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.75      |\n",
      "|    explained_variance   | 0.975     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.18      |\n",
      "|    n_updates            | 2190      |\n",
      "|    policy_gradient_loss | 0.105     |\n",
      "|    std                  | 0.0419    |\n",
      "|    value_loss           | 18.7      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 386      |\n",
      "|    ep_rew_mean          | 3.53e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 218      |\n",
      "|    iterations           | 21       |\n",
      "|    time_elapsed         | 197      |\n",
      "|    total_timesteps      | 43008    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 33.37827 |\n",
      "|    clip_fraction        | 0.244    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.78     |\n",
      "|    explained_variance   | 0.769    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.34     |\n",
      "|    n_updates            | 2200     |\n",
      "|    policy_gradient_loss | 0.0732   |\n",
      "|    std                  | 0.0397   |\n",
      "|    value_loss           | 6.47     |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 386      |\n",
      "|    ep_rew_mean          | 3.53e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 217      |\n",
      "|    iterations           | 22       |\n",
      "|    time_elapsed         | 206      |\n",
      "|    total_timesteps      | 45056    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 2.579321 |\n",
      "|    clip_fraction        | 0.09     |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.81     |\n",
      "|    explained_variance   | 0.978    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 10.1     |\n",
      "|    n_updates            | 2210     |\n",
      "|    policy_gradient_loss | 0.0848   |\n",
      "|    std                  | 0.0393   |\n",
      "|    value_loss           | 18.5     |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 386       |\n",
      "|    ep_rew_mean          | 3.53e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 217       |\n",
      "|    iterations           | 23        |\n",
      "|    time_elapsed         | 216       |\n",
      "|    total_timesteps      | 47104     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.3967321 |\n",
      "|    clip_fraction        | 0.149     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.82      |\n",
      "|    explained_variance   | -3.62     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.8       |\n",
      "|    n_updates            | 2220      |\n",
      "|    policy_gradient_loss | 0.0387    |\n",
      "|    std                  | 0.0389    |\n",
      "|    value_loss           | 534       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 386       |\n",
      "|    ep_rew_mean          | 3.52e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 217       |\n",
      "|    iterations           | 24        |\n",
      "|    time_elapsed         | 225       |\n",
      "|    total_timesteps      | 49152     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 16.831429 |\n",
      "|    clip_fraction        | 0.273     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.83      |\n",
      "|    explained_variance   | 0.976     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.19      |\n",
      "|    n_updates            | 2230      |\n",
      "|    policy_gradient_loss | 0.00871   |\n",
      "|    std                  | 0.0384    |\n",
      "|    value_loss           | 11.6      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 390       |\n",
      "|    ep_rew_mean          | 3.56e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 217       |\n",
      "|    iterations           | 25        |\n",
      "|    time_elapsed         | 235       |\n",
      "|    total_timesteps      | 51200     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.3907018 |\n",
      "|    clip_fraction        | 0.188     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.84      |\n",
      "|    explained_variance   | 0.221     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.95      |\n",
      "|    n_updates            | 2240      |\n",
      "|    policy_gradient_loss | 0.0845    |\n",
      "|    std                  | 0.0382    |\n",
      "|    value_loss           | 1.64e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 394       |\n",
      "|    ep_rew_mean          | 3.6e+03   |\n",
      "| time/                   |           |\n",
      "|    fps                  | 217       |\n",
      "|    iterations           | 26        |\n",
      "|    time_elapsed         | 244       |\n",
      "|    total_timesteps      | 53248     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 11.517178 |\n",
      "|    clip_fraction        | 0.146     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.85      |\n",
      "|    explained_variance   | 0.72      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.7       |\n",
      "|    n_updates            | 2250      |\n",
      "|    policy_gradient_loss | 0.0955    |\n",
      "|    std                  | 0.0377    |\n",
      "|    value_loss           | 30.5      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 394      |\n",
      "|    ep_rew_mean          | 3.61e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 217      |\n",
      "|    iterations           | 27       |\n",
      "|    time_elapsed         | 253      |\n",
      "|    total_timesteps      | 55296    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 3.86615  |\n",
      "|    clip_fraction        | 0.0962   |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.86     |\n",
      "|    explained_variance   | 0.107    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.84     |\n",
      "|    n_updates            | 2260     |\n",
      "|    policy_gradient_loss | 0.0597   |\n",
      "|    std                  | 0.0373   |\n",
      "|    value_loss           | 3.69e+03 |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 394       |\n",
      "|    ep_rew_mean          | 3.61e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 220       |\n",
      "|    iterations           | 28        |\n",
      "|    time_elapsed         | 260       |\n",
      "|    total_timesteps      | 57344     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 21.830866 |\n",
      "|    clip_fraction        | 0.077     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.88      |\n",
      "|    explained_variance   | 0.668     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 6.41      |\n",
      "|    n_updates            | 2270      |\n",
      "|    policy_gradient_loss | 0.0351    |\n",
      "|    std                  | 0.0367    |\n",
      "|    value_loss           | 43.7      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 394      |\n",
      "|    ep_rew_mean          | 3.61e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 222      |\n",
      "|    iterations           | 29       |\n",
      "|    time_elapsed         | 267      |\n",
      "|    total_timesteps      | 59392    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 7.164011 |\n",
      "|    clip_fraction        | 0.314    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.89     |\n",
      "|    explained_variance   | 0.997    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 3.92     |\n",
      "|    n_updates            | 2280     |\n",
      "|    policy_gradient_loss | 0.093    |\n",
      "|    std                  | 0.0362   |\n",
      "|    value_loss           | 22.9     |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 394       |\n",
      "|    ep_rew_mean          | 3.61e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 224       |\n",
      "|    iterations           | 30        |\n",
      "|    time_elapsed         | 273       |\n",
      "|    total_timesteps      | 61440     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 6.1203337 |\n",
      "|    clip_fraction        | 0.2       |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.94      |\n",
      "|    explained_variance   | 0.916     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.311     |\n",
      "|    n_updates            | 2290      |\n",
      "|    policy_gradient_loss | 0.00593   |\n",
      "|    std                  | 0.0337    |\n",
      "|    value_loss           | 3.63      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 394      |\n",
      "|    ep_rew_mean          | 3.61e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 226      |\n",
      "|    iterations           | 31       |\n",
      "|    time_elapsed         | 280      |\n",
      "|    total_timesteps      | 63488    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 2.810657 |\n",
      "|    clip_fraction        | 0.167    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 1.99     |\n",
      "|    explained_variance   | -2.07    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.536    |\n",
      "|    n_updates            | 2300     |\n",
      "|    policy_gradient_loss | 0.0665   |\n",
      "|    std                  | 0.0328   |\n",
      "|    value_loss           | 120      |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 394       |\n",
      "|    ep_rew_mean          | 3.61e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 228       |\n",
      "|    iterations           | 32        |\n",
      "|    time_elapsed         | 287       |\n",
      "|    total_timesteps      | 65536     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.8562193 |\n",
      "|    clip_fraction        | 0.241     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.01      |\n",
      "|    explained_variance   | 0.975     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 7.56      |\n",
      "|    n_updates            | 2310      |\n",
      "|    policy_gradient_loss | 0.0203    |\n",
      "|    std                  | 0.0318    |\n",
      "|    value_loss           | 6.93      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 398      |\n",
      "|    ep_rew_mean          | 3.67e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 230      |\n",
      "|    iterations           | 33       |\n",
      "|    time_elapsed         | 293      |\n",
      "|    total_timesteps      | 67584    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 9.398561 |\n",
      "|    clip_fraction        | 0.205    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.06     |\n",
      "|    explained_variance   | 0.759    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 2.04     |\n",
      "|    n_updates            | 2320     |\n",
      "|    policy_gradient_loss | 0.0103   |\n",
      "|    std                  | 0.0302   |\n",
      "|    value_loss           | 4.24     |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 398       |\n",
      "|    ep_rew_mean          | 3.67e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 231       |\n",
      "|    iterations           | 34        |\n",
      "|    time_elapsed         | 300       |\n",
      "|    total_timesteps      | 69632     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 249.65172 |\n",
      "|    clip_fraction        | 0.209     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.09      |\n",
      "|    explained_variance   | 0.957     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.274     |\n",
      "|    n_updates            | 2330      |\n",
      "|    policy_gradient_loss | 0.118     |\n",
      "|    std                  | 0.0297    |\n",
      "|    value_loss           | 14.7      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 398      |\n",
      "|    ep_rew_mean          | 3.67e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 232      |\n",
      "|    iterations           | 35       |\n",
      "|    time_elapsed         | 308      |\n",
      "|    total_timesteps      | 71680    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 8.961897 |\n",
      "|    clip_fraction        | 0.172    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.1      |\n",
      "|    explained_variance   | 0.99     |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 6.41     |\n",
      "|    n_updates            | 2340     |\n",
      "|    policy_gradient_loss | 0.148    |\n",
      "|    std                  | 0.0295   |\n",
      "|    value_loss           | 13.9     |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 398      |\n",
      "|    ep_rew_mean          | 3.68e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 230      |\n",
      "|    iterations           | 36       |\n",
      "|    time_elapsed         | 319      |\n",
      "|    total_timesteps      | 73728    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 72.67435 |\n",
      "|    clip_fraction        | 0.324    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.11     |\n",
      "|    explained_variance   | 0.985    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 43.6     |\n",
      "|    n_updates            | 2350     |\n",
      "|    policy_gradient_loss | 0.237    |\n",
      "|    std                  | 0.0294   |\n",
      "|    value_loss           | 11       |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 402      |\n",
      "|    ep_rew_mean          | 3.74e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 230      |\n",
      "|    iterations           | 37       |\n",
      "|    time_elapsed         | 328      |\n",
      "|    total_timesteps      | 75776    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 53.64248 |\n",
      "|    clip_fraction        | 0.234    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.11     |\n",
      "|    explained_variance   | 0.944    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 0.828    |\n",
      "|    n_updates            | 2360     |\n",
      "|    policy_gradient_loss | 0.137    |\n",
      "|    std                  | 0.0291   |\n",
      "|    value_loss           | 6.88     |\n",
      "--------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 402       |\n",
      "|    ep_rew_mean          | 3.75e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 229       |\n",
      "|    iterations           | 38        |\n",
      "|    time_elapsed         | 338       |\n",
      "|    total_timesteps      | 77824     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 11.147435 |\n",
      "|    clip_fraction        | 0.381     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.13      |\n",
      "|    explained_variance   | 0.976     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.44      |\n",
      "|    n_updates            | 2370      |\n",
      "|    policy_gradient_loss | 0.152     |\n",
      "|    std                  | 0.0287    |\n",
      "|    value_loss           | 8.7       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 402       |\n",
      "|    ep_rew_mean          | 3.75e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 229       |\n",
      "|    iterations           | 39        |\n",
      "|    time_elapsed         | 348       |\n",
      "|    total_timesteps      | 79872     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.5319796 |\n",
      "|    clip_fraction        | 0.302     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.15      |\n",
      "|    explained_variance   | 0.907     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.32      |\n",
      "|    n_updates            | 2380      |\n",
      "|    policy_gradient_loss | 0.109     |\n",
      "|    std                  | 0.0276    |\n",
      "|    value_loss           | 3.33      |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 402        |\n",
      "|    ep_rew_mean          | 3.76e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 228        |\n",
      "|    iterations           | 40         |\n",
      "|    time_elapsed         | 358        |\n",
      "|    total_timesteps      | 81920      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.17509487 |\n",
      "|    clip_fraction        | 0.136      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.18       |\n",
      "|    explained_variance   | 0.976      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 2.14       |\n",
      "|    n_updates            | 2390       |\n",
      "|    policy_gradient_loss | 0.0738     |\n",
      "|    std                  | 0.0271     |\n",
      "|    value_loss           | 7.89       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 402        |\n",
      "|    ep_rew_mean          | 3.76e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 228        |\n",
      "|    iterations           | 41         |\n",
      "|    time_elapsed         | 367        |\n",
      "|    total_timesteps      | 83968      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.55133235 |\n",
      "|    clip_fraction        | 0.162      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 2.2        |\n",
      "|    explained_variance   | 0.98       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 4.45       |\n",
      "|    n_updates            | 2400       |\n",
      "|    policy_gradient_loss | 0.0722     |\n",
      "|    std                  | 0.0266     |\n",
      "|    value_loss           | 9.67       |\n",
      "----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 402      |\n",
      "|    ep_rew_mean          | 3.77e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 227      |\n",
      "|    iterations           | 42       |\n",
      "|    time_elapsed         | 377      |\n",
      "|    total_timesteps      | 86016    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 379.5835 |\n",
      "|    clip_fraction        | 0.116    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.24     |\n",
      "|    explained_variance   | 0.971    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.37     |\n",
      "|    n_updates            | 2410     |\n",
      "|    policy_gradient_loss | 0.00542  |\n",
      "|    std                  | 0.0249   |\n",
      "|    value_loss           | 3.6      |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 402       |\n",
      "|    ep_rew_mean          | 3.77e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 227       |\n",
      "|    iterations           | 43        |\n",
      "|    time_elapsed         | 387       |\n",
      "|    total_timesteps      | 88064     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.6390144 |\n",
      "|    clip_fraction        | 0.291     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.28      |\n",
      "|    explained_variance   | 0.983     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.04      |\n",
      "|    n_updates            | 2420      |\n",
      "|    policy_gradient_loss | 0.137     |\n",
      "|    std                  | 0.0244    |\n",
      "|    value_loss           | 9.53      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 406       |\n",
      "|    ep_rew_mean          | 3.84e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 227       |\n",
      "|    iterations           | 44        |\n",
      "|    time_elapsed         | 396       |\n",
      "|    total_timesteps      | 90112     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 183.74478 |\n",
      "|    clip_fraction        | 0.116     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.32      |\n",
      "|    explained_variance   | 0.929     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.533     |\n",
      "|    n_updates            | 2430      |\n",
      "|    policy_gradient_loss | 0.0029    |\n",
      "|    std                  | 0.0231    |\n",
      "|    value_loss           | 2.59      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 406       |\n",
      "|    ep_rew_mean          | 3.84e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 226       |\n",
      "|    iterations           | 45        |\n",
      "|    time_elapsed         | 406       |\n",
      "|    total_timesteps      | 92160     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 5.0165787 |\n",
      "|    clip_fraction        | 0.26      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.36      |\n",
      "|    explained_variance   | 0.974     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 0.653     |\n",
      "|    n_updates            | 2440      |\n",
      "|    policy_gradient_loss | 0.0272    |\n",
      "|    std                  | 0.0227    |\n",
      "|    value_loss           | 6.77      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 410       |\n",
      "|    ep_rew_mean          | 3.91e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 226       |\n",
      "|    iterations           | 46        |\n",
      "|    time_elapsed         | 416       |\n",
      "|    total_timesteps      | 94208     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 10.226564 |\n",
      "|    clip_fraction        | 0.284     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.37      |\n",
      "|    explained_variance   | 0.946     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 2.59      |\n",
      "|    n_updates            | 2450      |\n",
      "|    policy_gradient_loss | 0.0114    |\n",
      "|    std                  | 0.0225    |\n",
      "|    value_loss           | 6.13      |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 410       |\n",
      "|    ep_rew_mean          | 3.91e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 226       |\n",
      "|    iterations           | 47        |\n",
      "|    time_elapsed         | 425       |\n",
      "|    total_timesteps      | 96256     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 11.455383 |\n",
      "|    clip_fraction        | 0.165     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.39      |\n",
      "|    explained_variance   | 0.989     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 6.42      |\n",
      "|    n_updates            | 2460      |\n",
      "|    policy_gradient_loss | 0.00511   |\n",
      "|    std                  | 0.022     |\n",
      "|    value_loss           | 18.9      |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 410       |\n",
      "|    ep_rew_mean          | 3.92e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 225       |\n",
      "|    iterations           | 48        |\n",
      "|    time_elapsed         | 435       |\n",
      "|    total_timesteps      | 98304     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.4430245 |\n",
      "|    clip_fraction        | 0.244     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 2.4       |\n",
      "|    explained_variance   | 0.96      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 7.18      |\n",
      "|    n_updates            | 2470      |\n",
      "|    policy_gradient_loss | 0.0575    |\n",
      "|    std                  | 0.0218    |\n",
      "|    value_loss           | 4.51      |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 410      |\n",
      "|    ep_rew_mean          | 3.92e+03 |\n",
      "| time/                   |          |\n",
      "|    fps                  | 225      |\n",
      "|    iterations           | 49       |\n",
      "|    time_elapsed         | 444      |\n",
      "|    total_timesteps      | 100352   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 897.6621 |\n",
      "|    clip_fraction        | 0.171    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 2.42     |\n",
      "|    explained_variance   | 0.979    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 5.9      |\n",
      "|    n_updates            | 2480     |\n",
      "|    policy_gradient_loss | 0.117    |\n",
      "|    std                  | 0.0215   |\n",
      "|    value_loss           | 11.2     |\n",
      "--------------------------------------\n",
      "--- 448.43996953964233 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.95\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 16.4     |\n",
      "|    ep_rew_mean     | -807     |\n",
      "| time/              |          |\n",
      "|    fps             | 359      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 5        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.3      |\n",
      "|    ep_rew_mean          | -808      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 264       |\n",
      "|    iterations           | 2         |\n",
      "|    time_elapsed         | 15        |\n",
      "|    total_timesteps      | 4096      |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 169.60493 |\n",
      "|    clip_fraction        | 0.773     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.69      |\n",
      "|    explained_variance   | 0.56      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.74e+03  |\n",
      "|    n_updates            | 2010      |\n",
      "|    policy_gradient_loss | 0.124     |\n",
      "|    std                  | 0.122     |\n",
      "|    value_loss           | 1.91e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 14.1     |\n",
      "|    ep_rew_mean          | -859     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 248      |\n",
      "|    iterations           | 3        |\n",
      "|    time_elapsed         | 24       |\n",
      "|    total_timesteps      | 6144     |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 6.659561 |\n",
      "|    clip_fraction        | 0.609    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.687    |\n",
      "|    explained_variance   | 0.745    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 19.9     |\n",
      "|    n_updates            | 2020     |\n",
      "|    policy_gradient_loss | 0.0839   |\n",
      "|    std                  | 0.122    |\n",
      "|    value_loss           | 1.02e+03 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 16.4     |\n",
      "|    ep_rew_mean          | -822     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 239      |\n",
      "|    iterations           | 4        |\n",
      "|    time_elapsed         | 34       |\n",
      "|    total_timesteps      | 8192     |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 328.7948 |\n",
      "|    clip_fraction        | 0.952    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.686    |\n",
      "|    explained_variance   | 0.403    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 110      |\n",
      "|    n_updates            | 2030     |\n",
      "|    policy_gradient_loss | 0.267    |\n",
      "|    std                  | 0.122    |\n",
      "|    value_loss           | 2.48e+03 |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 16.5      |\n",
      "|    ep_rew_mean          | -833      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 233       |\n",
      "|    iterations           | 5         |\n",
      "|    time_elapsed         | 43        |\n",
      "|    total_timesteps      | 10240     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 105.69351 |\n",
      "|    clip_fraction        | 0.923     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.685     |\n",
      "|    explained_variance   | 0.679     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 74.8      |\n",
      "|    n_updates            | 2040      |\n",
      "|    policy_gradient_loss | 0.263     |\n",
      "|    std                  | 0.122     |\n",
      "|    value_loss           | 1.41e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.2      |\n",
      "|    ep_rew_mean          | -797      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 230       |\n",
      "|    iterations           | 6         |\n",
      "|    time_elapsed         | 53        |\n",
      "|    total_timesteps      | 12288     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 58.612576 |\n",
      "|    clip_fraction        | 0.911     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.682     |\n",
      "|    explained_variance   | 0.698     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 15.5      |\n",
      "|    n_updates            | 2050      |\n",
      "|    policy_gradient_loss | 0.287     |\n",
      "|    std                  | 0.122     |\n",
      "|    value_loss           | 994       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 18.4      |\n",
      "|    ep_rew_mean          | -847      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 227       |\n",
      "|    iterations           | 7         |\n",
      "|    time_elapsed         | 63        |\n",
      "|    total_timesteps      | 14336     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 898.13226 |\n",
      "|    clip_fraction        | 0.876     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.68      |\n",
      "|    explained_variance   | 0.399     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 356       |\n",
      "|    n_updates            | 2060      |\n",
      "|    policy_gradient_loss | 0.176     |\n",
      "|    std                  | 0.123     |\n",
      "|    value_loss           | 467       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.5      |\n",
      "|    ep_rew_mean          | -829      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 225       |\n",
      "|    iterations           | 8         |\n",
      "|    time_elapsed         | 72        |\n",
      "|    total_timesteps      | 16384     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 49.768673 |\n",
      "|    clip_fraction        | 0.835     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.68      |\n",
      "|    explained_variance   | 0.544     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 16.4      |\n",
      "|    n_updates            | 2070      |\n",
      "|    policy_gradient_loss | 0.0262    |\n",
      "|    std                  | 0.123     |\n",
      "|    value_loss           | 3.37e+03  |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.8      |\n",
      "|    ep_rew_mean          | -777      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 224       |\n",
      "|    iterations           | 9         |\n",
      "|    time_elapsed         | 82        |\n",
      "|    total_timesteps      | 18432     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 44.686012 |\n",
      "|    clip_fraction        | 0.698     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.681     |\n",
      "|    explained_variance   | 0.668     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 112       |\n",
      "|    n_updates            | 2080      |\n",
      "|    policy_gradient_loss | 0.0288    |\n",
      "|    std                  | 0.122     |\n",
      "|    value_loss           | 3.36e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 17.1      |\n",
      "|    ep_rew_mean          | -827      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 223       |\n",
      "|    iterations           | 10        |\n",
      "|    time_elapsed         | 91        |\n",
      "|    total_timesteps      | 20480     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 15.336521 |\n",
      "|    clip_fraction        | 0.822     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.683     |\n",
      "|    explained_variance   | 0.751     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 47.5      |\n",
      "|    n_updates            | 2090      |\n",
      "|    policy_gradient_loss | 0.113     |\n",
      "|    std                  | 0.122     |\n",
      "|    value_loss           | 1.96e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 15.3     |\n",
      "|    ep_rew_mean          | -807     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 227      |\n",
      "|    iterations           | 11       |\n",
      "|    time_elapsed         | 98       |\n",
      "|    total_timesteps      | 22528    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 8.998261 |\n",
      "|    clip_fraction        | 0.483    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.682    |\n",
      "|    explained_variance   | 0.72     |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 4.14e+03 |\n",
      "|    n_updates            | 2100     |\n",
      "|    policy_gradient_loss | 0.0819   |\n",
      "|    std                  | 0.122    |\n",
      "|    value_loss           | 3.57e+03 |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 17.9        |\n",
      "|    ep_rew_mean          | -813        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 232         |\n",
      "|    iterations           | 12          |\n",
      "|    time_elapsed         | 105         |\n",
      "|    total_timesteps      | 24576       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.029752996 |\n",
      "|    clip_fraction        | 0.301       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.681       |\n",
      "|    explained_variance   | 0.818       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 115         |\n",
      "|    n_updates            | 2110        |\n",
      "|    policy_gradient_loss | 0.0182      |\n",
      "|    std                  | 0.122       |\n",
      "|    value_loss           | 1.77e+03    |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 19.5      |\n",
      "|    ep_rew_mean          | -748      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 236       |\n",
      "|    iterations           | 13        |\n",
      "|    time_elapsed         | 112       |\n",
      "|    total_timesteps      | 26624     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 30.548716 |\n",
      "|    clip_fraction        | 0.87      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.68      |\n",
      "|    explained_variance   | 0.784     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 40.7      |\n",
      "|    n_updates            | 2120      |\n",
      "|    policy_gradient_loss | 0.0856    |\n",
      "|    std                  | 0.123     |\n",
      "|    value_loss           | 2.68e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 24.6      |\n",
      "|    ep_rew_mean          | -683      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 240       |\n",
      "|    iterations           | 14        |\n",
      "|    time_elapsed         | 119       |\n",
      "|    total_timesteps      | 28672     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 27.712055 |\n",
      "|    clip_fraction        | 0.912     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.681     |\n",
      "|    explained_variance   | 0.843     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 25.7      |\n",
      "|    n_updates            | 2130      |\n",
      "|    policy_gradient_loss | 0.0986    |\n",
      "|    std                  | 0.122     |\n",
      "|    value_loss           | 417       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 25.5      |\n",
      "|    ep_rew_mean          | -712      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 244       |\n",
      "|    iterations           | 15        |\n",
      "|    time_elapsed         | 125       |\n",
      "|    total_timesteps      | 30720     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4.4529543 |\n",
      "|    clip_fraction        | 0.764     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.679     |\n",
      "|    explained_variance   | 0.929     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 104       |\n",
      "|    n_updates            | 2140      |\n",
      "|    policy_gradient_loss | 0.15      |\n",
      "|    std                  | 0.123     |\n",
      "|    value_loss           | 384       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 22.1      |\n",
      "|    ep_rew_mean          | -757      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 247       |\n",
      "|    iterations           | 16        |\n",
      "|    time_elapsed         | 132       |\n",
      "|    total_timesteps      | 32768     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 2.9129682 |\n",
      "|    clip_fraction        | 0.557     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.675     |\n",
      "|    explained_variance   | 0.861     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 76.9      |\n",
      "|    n_updates            | 2150      |\n",
      "|    policy_gradient_loss | 0.0494    |\n",
      "|    std                  | 0.123     |\n",
      "|    value_loss           | 2.8e+03   |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 26.5      |\n",
      "|    ep_rew_mean          | -658      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 249       |\n",
      "|    iterations           | 17        |\n",
      "|    time_elapsed         | 139       |\n",
      "|    total_timesteps      | 34816     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 14.338218 |\n",
      "|    clip_fraction        | 0.833     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.672     |\n",
      "|    explained_variance   | 0.763     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.26e+04  |\n",
      "|    n_updates            | 2160      |\n",
      "|    policy_gradient_loss | 0.0668    |\n",
      "|    std                  | 0.124     |\n",
      "|    value_loss           | 6.79e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 27.4      |\n",
      "|    ep_rew_mean          | -630      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 252       |\n",
      "|    iterations           | 18        |\n",
      "|    time_elapsed         | 145       |\n",
      "|    total_timesteps      | 36864     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.9973974 |\n",
      "|    clip_fraction        | 0.403     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.671     |\n",
      "|    explained_variance   | 0.97      |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 18.8      |\n",
      "|    n_updates            | 2170      |\n",
      "|    policy_gradient_loss | 0.000334  |\n",
      "|    std                  | 0.123     |\n",
      "|    value_loss           | 413       |\n",
      "---------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 33        |\n",
      "|    ep_rew_mean          | -594      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 255       |\n",
      "|    iterations           | 19        |\n",
      "|    time_elapsed         | 152       |\n",
      "|    total_timesteps      | 38912     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 11.181444 |\n",
      "|    clip_fraction        | 0.644     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.671     |\n",
      "|    explained_variance   | 0.978     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 49.3      |\n",
      "|    n_updates            | 2180      |\n",
      "|    policy_gradient_loss | 0.097     |\n",
      "|    std                  | 0.124     |\n",
      "|    value_loss           | 185       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 42.2      |\n",
      "|    ep_rew_mean          | -513      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 257       |\n",
      "|    iterations           | 20        |\n",
      "|    time_elapsed         | 159       |\n",
      "|    total_timesteps      | 40960     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 12.961172 |\n",
      "|    clip_fraction        | 0.784     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.671     |\n",
      "|    explained_variance   | 0.947     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 39.6      |\n",
      "|    n_updates            | 2190      |\n",
      "|    policy_gradient_loss | 0.089     |\n",
      "|    std                  | 0.124     |\n",
      "|    value_loss           | 2.02e+03  |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 54.8     |\n",
      "|    ep_rew_mean          | -390     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 259      |\n",
      "|    iterations           | 21       |\n",
      "|    time_elapsed         | 165      |\n",
      "|    total_timesteps      | 43008    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 65.82243 |\n",
      "|    clip_fraction        | 0.756    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.674    |\n",
      "|    explained_variance   | 0.99     |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 25.3     |\n",
      "|    n_updates            | 2200     |\n",
      "|    policy_gradient_loss | 0.108    |\n",
      "|    std                  | 0.123    |\n",
      "|    value_loss           | 244      |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 71.2      |\n",
      "|    ep_rew_mean          | -238      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 261       |\n",
      "|    iterations           | 22        |\n",
      "|    time_elapsed         | 172       |\n",
      "|    total_timesteps      | 45056     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 21.761497 |\n",
      "|    clip_fraction        | 0.763     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.674     |\n",
      "|    explained_variance   | 0.979     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 73.9      |\n",
      "|    n_updates            | 2210      |\n",
      "|    policy_gradient_loss | 0.232     |\n",
      "|    std                  | 0.124     |\n",
      "|    value_loss           | 761       |\n",
      "---------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 83.4     |\n",
      "|    ep_rew_mean          | -129     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 262      |\n",
      "|    iterations           | 23       |\n",
      "|    time_elapsed         | 179      |\n",
      "|    total_timesteps      | 47104    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 69.48995 |\n",
      "|    clip_fraction        | 0.742    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.669    |\n",
      "|    explained_variance   | 0.932    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.21e+03 |\n",
      "|    n_updates            | 2220     |\n",
      "|    policy_gradient_loss | 0.19     |\n",
      "|    std                  | 0.124    |\n",
      "|    value_loss           | 2.66e+03 |\n",
      "--------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 86.2     |\n",
      "|    ep_rew_mean          | -108     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 264      |\n",
      "|    iterations           | 24       |\n",
      "|    time_elapsed         | 186      |\n",
      "|    total_timesteps      | 49152    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 32.86512 |\n",
      "|    clip_fraction        | 0.393    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.67     |\n",
      "|    explained_variance   | 0.911    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 151      |\n",
      "|    n_updates            | 2230     |\n",
      "|    policy_gradient_loss | 0.083    |\n",
      "|    std                  | 0.124    |\n",
      "|    value_loss           | 1.4e+03  |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 60.6      |\n",
      "|    ep_rew_mean          | -344      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 265       |\n",
      "|    iterations           | 25        |\n",
      "|    time_elapsed         | 192       |\n",
      "|    total_timesteps      | 51200     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 7.6798058 |\n",
      "|    clip_fraction        | 0.81      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.668     |\n",
      "|    explained_variance   | 0.969     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 34.5      |\n",
      "|    n_updates            | 2240      |\n",
      "|    policy_gradient_loss | 0.0901    |\n",
      "|    std                  | 0.124     |\n",
      "|    value_loss           | 284       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 54.8      |\n",
      "|    ep_rew_mean          | -395      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 267       |\n",
      "|    iterations           | 26        |\n",
      "|    time_elapsed         | 199       |\n",
      "|    total_timesteps      | 53248     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 1.8050169 |\n",
      "|    clip_fraction        | 0.36      |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.659     |\n",
      "|    explained_variance   | 0.983     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 42.4      |\n",
      "|    n_updates            | 2250      |\n",
      "|    policy_gradient_loss | 0.0696    |\n",
      "|    std                  | 0.126     |\n",
      "|    value_loss           | 225       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 56        |\n",
      "|    ep_rew_mean          | -405      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 268       |\n",
      "|    iterations           | 27        |\n",
      "|    time_elapsed         | 205       |\n",
      "|    total_timesteps      | 55296     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 7.2063627 |\n",
      "|    clip_fraction        | 0.249     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.652     |\n",
      "|    explained_variance   | 0.995     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 13.9      |\n",
      "|    n_updates            | 2260      |\n",
      "|    policy_gradient_loss | 0.019     |\n",
      "|    std                  | 0.125     |\n",
      "|    value_loss           | 79.4      |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 53.5       |\n",
      "|    ep_rew_mean          | -424       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 269        |\n",
      "|    iterations           | 28         |\n",
      "|    time_elapsed         | 212        |\n",
      "|    total_timesteps      | 57344      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.41706586 |\n",
      "|    clip_fraction        | 0.207      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.666      |\n",
      "|    explained_variance   | 0.958      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 36.3       |\n",
      "|    n_updates            | 2270       |\n",
      "|    policy_gradient_loss | 0.00101    |\n",
      "|    std                  | 0.124      |\n",
      "|    value_loss           | 5.34e+03   |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 53       |\n",
      "|    ep_rew_mean          | -441     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 270      |\n",
      "|    iterations           | 29       |\n",
      "|    time_elapsed         | 219      |\n",
      "|    total_timesteps      | 59392    |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 1.850192 |\n",
      "|    clip_fraction        | 0.371    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.674    |\n",
      "|    explained_variance   | 0.999    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 40.8     |\n",
      "|    n_updates            | 2280     |\n",
      "|    policy_gradient_loss | 0.0694   |\n",
      "|    std                  | 0.123    |\n",
      "|    value_loss           | 51.5     |\n",
      "--------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 51.5      |\n",
      "|    ep_rew_mean          | -462      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 271       |\n",
      "|    iterations           | 30        |\n",
      "|    time_elapsed         | 226       |\n",
      "|    total_timesteps      | 61440     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 6.1507826 |\n",
      "|    clip_fraction        | 0.305     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.685     |\n",
      "|    explained_variance   | 0.966     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 30        |\n",
      "|    n_updates            | 2290      |\n",
      "|    policy_gradient_loss | 0.00944   |\n",
      "|    std                  | 0.121     |\n",
      "|    value_loss           | 3.91e+03  |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 53.8      |\n",
      "|    ep_rew_mean          | -417      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 272       |\n",
      "|    iterations           | 31        |\n",
      "|    time_elapsed         | 233       |\n",
      "|    total_timesteps      | 63488     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 3.6424034 |\n",
      "|    clip_fraction        | 0.359     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.694     |\n",
      "|    explained_variance   | 0.987     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 20.7      |\n",
      "|    n_updates            | 2300      |\n",
      "|    policy_gradient_loss | 0.0385    |\n",
      "|    std                  | 0.12      |\n",
      "|    value_loss           | 261       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 58.3      |\n",
      "|    ep_rew_mean          | -371      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 271       |\n",
      "|    iterations           | 32        |\n",
      "|    time_elapsed         | 241       |\n",
      "|    total_timesteps      | 65536     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.5473411 |\n",
      "|    clip_fraction        | 0.318     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.702     |\n",
      "|    explained_variance   | 0.999     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 161       |\n",
      "|    n_updates            | 2310      |\n",
      "|    policy_gradient_loss | 0.021     |\n",
      "|    std                  | 0.12      |\n",
      "|    value_loss           | 60.6      |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 65.4       |\n",
      "|    ep_rew_mean          | -285       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 268        |\n",
      "|    iterations           | 33         |\n",
      "|    time_elapsed         | 251        |\n",
      "|    total_timesteps      | 67584      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.49661443 |\n",
      "|    clip_fraction        | 0.447      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.703      |\n",
      "|    explained_variance   | 0.999      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 24.8       |\n",
      "|    n_updates            | 2320       |\n",
      "|    policy_gradient_loss | 0.101      |\n",
      "|    std                  | 0.12       |\n",
      "|    value_loss           | 57.1       |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 67.8       |\n",
      "|    ep_rew_mean          | -274       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 266        |\n",
      "|    iterations           | 34         |\n",
      "|    time_elapsed         | 261        |\n",
      "|    total_timesteps      | 69632      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.18223259 |\n",
      "|    clip_fraction        | 0.181      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.711      |\n",
      "|    explained_variance   | 0.947      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 70.9       |\n",
      "|    n_updates            | 2330       |\n",
      "|    policy_gradient_loss | 0.00223    |\n",
      "|    std                  | 0.118      |\n",
      "|    value_loss           | 868        |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 69.7      |\n",
      "|    ep_rew_mean          | -282      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 264       |\n",
      "|    iterations           | 35        |\n",
      "|    time_elapsed         | 270       |\n",
      "|    total_timesteps      | 71680     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 14.600253 |\n",
      "|    clip_fraction        | 0.287     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.724     |\n",
      "|    explained_variance   | 0.979     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 32        |\n",
      "|    n_updates            | 2340      |\n",
      "|    policy_gradient_loss | 0.0205    |\n",
      "|    std                  | 0.117     |\n",
      "|    value_loss           | 3.12e+03  |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 62         |\n",
      "|    ep_rew_mean          | -350       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 262        |\n",
      "|    iterations           | 36         |\n",
      "|    time_elapsed         | 280        |\n",
      "|    total_timesteps      | 73728      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.37186545 |\n",
      "|    clip_fraction        | 0.192      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.731      |\n",
      "|    explained_variance   | 0.884      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 95.1       |\n",
      "|    n_updates            | 2350       |\n",
      "|    policy_gradient_loss | 0.0112     |\n",
      "|    std                  | 0.116      |\n",
      "|    value_loss           | 1.02e+04   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 66.8       |\n",
      "|    ep_rew_mean          | -320       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 261        |\n",
      "|    iterations           | 37         |\n",
      "|    time_elapsed         | 289        |\n",
      "|    total_timesteps      | 75776      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.28197697 |\n",
      "|    clip_fraction        | 0.243      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.734      |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 57.2       |\n",
      "|    n_updates            | 2360       |\n",
      "|    policy_gradient_loss | -0.00536   |\n",
      "|    std                  | 0.116      |\n",
      "|    value_loss           | 104        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 67.4        |\n",
      "|    ep_rew_mean          | -275        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 259         |\n",
      "|    iterations           | 38          |\n",
      "|    time_elapsed         | 299         |\n",
      "|    total_timesteps      | 77824       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024853952 |\n",
      "|    clip_fraction        | 0.103       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.738       |\n",
      "|    explained_variance   | 0.854       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.95e+04    |\n",
      "|    n_updates            | 2370        |\n",
      "|    policy_gradient_loss | -0.00762    |\n",
      "|    std                  | 0.116       |\n",
      "|    value_loss           | 2.12e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 70.9       |\n",
      "|    ep_rew_mean          | -259       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 258        |\n",
      "|    iterations           | 39         |\n",
      "|    time_elapsed         | 309        |\n",
      "|    total_timesteps      | 79872      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.84760743 |\n",
      "|    clip_fraction        | 0.295      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.741      |\n",
      "|    explained_variance   | 0.995      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 61.6       |\n",
      "|    n_updates            | 2380       |\n",
      "|    policy_gradient_loss | -0.00135   |\n",
      "|    std                  | 0.115      |\n",
      "|    value_loss           | 142        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 71.4       |\n",
      "|    ep_rew_mean          | -260       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 257        |\n",
      "|    iterations           | 40         |\n",
      "|    time_elapsed         | 318        |\n",
      "|    total_timesteps      | 81920      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.22379285 |\n",
      "|    clip_fraction        | 0.187      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.746      |\n",
      "|    explained_variance   | 0.996      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 34.8       |\n",
      "|    n_updates            | 2390       |\n",
      "|    policy_gradient_loss | -0.00274   |\n",
      "|    std                  | 0.115      |\n",
      "|    value_loss           | 271        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 73.7       |\n",
      "|    ep_rew_mean          | -240       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 255        |\n",
      "|    iterations           | 41         |\n",
      "|    time_elapsed         | 328        |\n",
      "|    total_timesteps      | 83968      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.13147558 |\n",
      "|    clip_fraction        | 0.223      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.746      |\n",
      "|    explained_variance   | 0.999      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 34.5       |\n",
      "|    n_updates            | 2400       |\n",
      "|    policy_gradient_loss | -0.0125    |\n",
      "|    std                  | 0.114      |\n",
      "|    value_loss           | 124        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 77.2       |\n",
      "|    ep_rew_mean          | -233       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 254        |\n",
      "|    iterations           | 42         |\n",
      "|    time_elapsed         | 337        |\n",
      "|    total_timesteps      | 86016      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.21243775 |\n",
      "|    clip_fraction        | 0.218      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.751      |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 68.8       |\n",
      "|    n_updates            | 2410       |\n",
      "|    policy_gradient_loss | -0.00815   |\n",
      "|    std                  | 0.114      |\n",
      "|    value_loss           | 109        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 80.5        |\n",
      "|    ep_rew_mean          | -219        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 253         |\n",
      "|    iterations           | 43          |\n",
      "|    time_elapsed         | 347         |\n",
      "|    total_timesteps      | 88064       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.017172681 |\n",
      "|    clip_fraction        | 0.169       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.754       |\n",
      "|    explained_variance   | 0.912       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.47e+03    |\n",
      "|    n_updates            | 2420        |\n",
      "|    policy_gradient_loss | -0.00438    |\n",
      "|    std                  | 0.114       |\n",
      "|    value_loss           | 1.12e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 86          |\n",
      "|    ep_rew_mean          | -173        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 252         |\n",
      "|    iterations           | 44          |\n",
      "|    time_elapsed         | 356         |\n",
      "|    total_timesteps      | 90112       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.117933065 |\n",
      "|    clip_fraction        | 0.216       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.758       |\n",
      "|    explained_variance   | 0.971       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 47.3        |\n",
      "|    n_updates            | 2430        |\n",
      "|    policy_gradient_loss | 0.00963     |\n",
      "|    std                  | 0.113       |\n",
      "|    value_loss           | 4.24e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 87         |\n",
      "|    ep_rew_mean          | -162       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 251        |\n",
      "|    iterations           | 45         |\n",
      "|    time_elapsed         | 366        |\n",
      "|    total_timesteps      | 92160      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.09837577 |\n",
      "|    clip_fraction        | 0.248      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.765      |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 52.2       |\n",
      "|    n_updates            | 2440       |\n",
      "|    policy_gradient_loss | 0.00545    |\n",
      "|    std                  | 0.112      |\n",
      "|    value_loss           | 231        |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 78.5      |\n",
      "|    ep_rew_mean          | -240      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 250       |\n",
      "|    iterations           | 46        |\n",
      "|    time_elapsed         | 376       |\n",
      "|    total_timesteps      | 94208     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 4.7543993 |\n",
      "|    clip_fraction        | 0.412     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.77      |\n",
      "|    explained_variance   | 0.998     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 33.6      |\n",
      "|    n_updates            | 2450      |\n",
      "|    policy_gradient_loss | 0.094     |\n",
      "|    std                  | 0.112     |\n",
      "|    value_loss           | 202       |\n",
      "---------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 77.4      |\n",
      "|    ep_rew_mean          | -240      |\n",
      "| time/                   |           |\n",
      "|    fps                  | 249       |\n",
      "|    iterations           | 47        |\n",
      "|    time_elapsed         | 385       |\n",
      "|    total_timesteps      | 96256     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0655767 |\n",
      "|    clip_fraction        | 0.183     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.773     |\n",
      "|    explained_variance   | 0.968     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 3.75e+03  |\n",
      "|    n_updates            | 2460      |\n",
      "|    policy_gradient_loss | 0.00817   |\n",
      "|    std                  | 0.112     |\n",
      "|    value_loss           | 4.42e+03  |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 56.3        |\n",
      "|    ep_rew_mean          | -437        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 248         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 395         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.044501852 |\n",
      "|    clip_fraction        | 0.136       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.772       |\n",
      "|    explained_variance   | 0.971       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 641         |\n",
      "|    n_updates            | 2470        |\n",
      "|    policy_gradient_loss | -0.00144    |\n",
      "|    std                  | 0.112       |\n",
      "|    value_loss           | 2.89e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 62.5     |\n",
      "|    ep_rew_mean          | -353     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 247      |\n",
      "|    iterations           | 49       |\n",
      "|    time_elapsed         | 404      |\n",
      "|    total_timesteps      | 100352   |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 8.274174 |\n",
      "|    clip_fraction        | 0.339    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | 0.772    |\n",
      "|    explained_variance   | 0.995    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 17.9     |\n",
      "|    n_updates            | 2480     |\n",
      "|    policy_gradient_loss | 0.054    |\n",
      "|    std                  | 0.112    |\n",
      "|    value_loss           | 120      |\n",
      "--------------------------------------\n",
      "--- 408.68086552619934 seconds ---\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 1.0\n",
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 172      |\n",
      "|    ep_rew_mean     | 484      |\n",
      "| time/              |          |\n",
      "|    fps             | 348      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 5        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 202         |\n",
      "|    ep_rew_mean          | 930         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 259         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 15          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.039411712 |\n",
      "|    clip_fraction        | 0.156       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.1         |\n",
      "|    explained_variance   | 0.998       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 40.7        |\n",
      "|    n_updates            | 2010        |\n",
      "|    policy_gradient_loss | 0.0113      |\n",
      "|    std                  | 0.0804      |\n",
      "|    value_loss           | 311         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 210        |\n",
      "|    ep_rew_mean          | 978        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 243        |\n",
      "|    iterations           | 3          |\n",
      "|    time_elapsed         | 25         |\n",
      "|    total_timesteps      | 6144       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.13389672 |\n",
      "|    clip_fraction        | 0.0401     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.1        |\n",
      "|    explained_variance   | 0.989      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.14e+03   |\n",
      "|    n_updates            | 2020       |\n",
      "|    policy_gradient_loss | 0.00618    |\n",
      "|    std                  | 0.0801     |\n",
      "|    value_loss           | 1.99e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 229         |\n",
      "|    ep_rew_mean          | 1.24e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 235         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 34          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004314984 |\n",
      "|    clip_fraction        | 0.0203      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | 0.822       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 8.33e+03    |\n",
      "|    n_updates            | 2030        |\n",
      "|    policy_gradient_loss | -0.00484    |\n",
      "|    std                  | 0.0801      |\n",
      "|    value_loss           | 2.44e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 241         |\n",
      "|    ep_rew_mean          | 1.39e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 231         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 44          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006155554 |\n",
      "|    clip_fraction        | 0.0458      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | 0.833       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 137         |\n",
      "|    n_updates            | 2040        |\n",
      "|    policy_gradient_loss | -0.00381    |\n",
      "|    std                  | 0.08        |\n",
      "|    value_loss           | 3.64e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 231         |\n",
      "|    ep_rew_mean          | 1.33e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 227         |\n",
      "|    iterations           | 6           |\n",
      "|    time_elapsed         | 53          |\n",
      "|    total_timesteps      | 12288       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.099704206 |\n",
      "|    clip_fraction        | 0.0691      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | 0.944       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 25.2        |\n",
      "|    n_updates            | 2050        |\n",
      "|    policy_gradient_loss | 0.0167      |\n",
      "|    std                  | 0.0798      |\n",
      "|    value_loss           | 567         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 235         |\n",
      "|    ep_rew_mean          | 1.42e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 225         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 63          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013024409 |\n",
      "|    clip_fraction        | 0.0476      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | 0.947       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.64e+03    |\n",
      "|    n_updates            | 2060        |\n",
      "|    policy_gradient_loss | 0.0153      |\n",
      "|    std                  | 0.0797      |\n",
      "|    value_loss           | 3.9e+03     |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 235        |\n",
      "|    ep_rew_mean          | 1.45e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 223        |\n",
      "|    iterations           | 8          |\n",
      "|    time_elapsed         | 73         |\n",
      "|    total_timesteps      | 16384      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.04070182 |\n",
      "|    clip_fraction        | 0.0832     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.11       |\n",
      "|    explained_variance   | 0.888      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 131        |\n",
      "|    n_updates            | 2070       |\n",
      "|    policy_gradient_loss | -0.00179   |\n",
      "|    std                  | 0.0795     |\n",
      "|    value_loss           | 1.12e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 224         |\n",
      "|    ep_rew_mean          | 1.34e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 221         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 83          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.048106164 |\n",
      "|    clip_fraction        | 0.0356      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.11        |\n",
      "|    explained_variance   | 0.965       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 381         |\n",
      "|    n_updates            | 2080        |\n",
      "|    policy_gradient_loss | -0.000223   |\n",
      "|    std                  | 0.0794      |\n",
      "|    value_loss           | 592         |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 232          |\n",
      "|    ep_rew_mean          | 1.45e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 220          |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 92           |\n",
      "|    total_timesteps      | 20480        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0045435457 |\n",
      "|    clip_fraction        | 0.0192       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.12         |\n",
      "|    explained_variance   | 0.986        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 402          |\n",
      "|    n_updates            | 2090         |\n",
      "|    policy_gradient_loss | -0.000893    |\n",
      "|    std                  | 0.0791       |\n",
      "|    value_loss           | 954          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 226         |\n",
      "|    ep_rew_mean          | 1.36e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 220         |\n",
      "|    iterations           | 11          |\n",
      "|    time_elapsed         | 102         |\n",
      "|    total_timesteps      | 22528       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.025307747 |\n",
      "|    clip_fraction        | 0.0984      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.12        |\n",
      "|    explained_variance   | 0.92        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 87.7        |\n",
      "|    n_updates            | 2100        |\n",
      "|    policy_gradient_loss | 0.00376     |\n",
      "|    std                  | 0.0786      |\n",
      "|    value_loss           | 247         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 222         |\n",
      "|    ep_rew_mean          | 1.33e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 219         |\n",
      "|    iterations           | 12          |\n",
      "|    time_elapsed         | 111         |\n",
      "|    total_timesteps      | 24576       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015009484 |\n",
      "|    clip_fraction        | 0.0487      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.13        |\n",
      "|    explained_variance   | 0.976       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 113         |\n",
      "|    n_updates            | 2110        |\n",
      "|    policy_gradient_loss | 0.00623     |\n",
      "|    std                  | 0.0783      |\n",
      "|    value_loss           | 386         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 219         |\n",
      "|    ep_rew_mean          | 1.29e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 219         |\n",
      "|    iterations           | 13          |\n",
      "|    time_elapsed         | 121         |\n",
      "|    total_timesteps      | 26624       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.028493335 |\n",
      "|    clip_fraction        | 0.042       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.13        |\n",
      "|    explained_variance   | 0.925       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.23e+03    |\n",
      "|    n_updates            | 2120        |\n",
      "|    policy_gradient_loss | 0.000167    |\n",
      "|    std                  | 0.0782      |\n",
      "|    value_loss           | 7.89e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 214         |\n",
      "|    ep_rew_mean          | 1.27e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 219         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 130         |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008682229 |\n",
      "|    clip_fraction        | 0.0453      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.13        |\n",
      "|    explained_variance   | 0.902       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.56e+03    |\n",
      "|    n_updates            | 2130        |\n",
      "|    policy_gradient_loss | -0.00227    |\n",
      "|    std                  | 0.0782      |\n",
      "|    value_loss           | 1.15e+04    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 205        |\n",
      "|    ep_rew_mean          | 1.16e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 218        |\n",
      "|    iterations           | 15         |\n",
      "|    time_elapsed         | 140        |\n",
      "|    total_timesteps      | 30720      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.34519023 |\n",
      "|    clip_fraction        | 0.21       |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.13       |\n",
      "|    explained_variance   | 0.956      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 213        |\n",
      "|    n_updates            | 2140       |\n",
      "|    policy_gradient_loss | 0.0301     |\n",
      "|    std                  | 0.0781     |\n",
      "|    value_loss           | 1.96e+03   |\n",
      "----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 193       |\n",
      "|    ep_rew_mean          | 1e+03     |\n",
      "| time/                   |           |\n",
      "|    fps                  | 217       |\n",
      "|    iterations           | 16        |\n",
      "|    time_elapsed         | 150       |\n",
      "|    total_timesteps      | 32768     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.2491391 |\n",
      "|    clip_fraction        | 0.176     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.13      |\n",
      "|    explained_variance   | 0.984     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 185       |\n",
      "|    n_updates            | 2150      |\n",
      "|    policy_gradient_loss | 0.0061    |\n",
      "|    std                  | 0.078     |\n",
      "|    value_loss           | 453       |\n",
      "---------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 181         |\n",
      "|    ep_rew_mean          | 797         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 217         |\n",
      "|    iterations           | 17          |\n",
      "|    time_elapsed         | 160         |\n",
      "|    total_timesteps      | 34816       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.020200621 |\n",
      "|    clip_fraction        | 0.0527      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.13        |\n",
      "|    explained_variance   | 0.981       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 817         |\n",
      "|    n_updates            | 2160        |\n",
      "|    policy_gradient_loss | 0.000801    |\n",
      "|    std                  | 0.078       |\n",
      "|    value_loss           | 1.3e+03     |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 154        |\n",
      "|    ep_rew_mean          | 427        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 217        |\n",
      "|    iterations           | 18         |\n",
      "|    time_elapsed         | 169        |\n",
      "|    total_timesteps      | 36864      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03477214 |\n",
      "|    clip_fraction        | 0.0917     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.13       |\n",
      "|    explained_variance   | 0.999      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 184        |\n",
      "|    n_updates            | 2170       |\n",
      "|    policy_gradient_loss | 0.0138     |\n",
      "|    std                  | 0.0781     |\n",
      "|    value_loss           | 419        |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 154          |\n",
      "|    ep_rew_mean          | 467          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 216          |\n",
      "|    iterations           | 19           |\n",
      "|    time_elapsed         | 179          |\n",
      "|    total_timesteps      | 38912        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0035881982 |\n",
      "|    clip_fraction        | 0.0445       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.13         |\n",
      "|    explained_variance   | 0.989        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 918          |\n",
      "|    n_updates            | 2180         |\n",
      "|    policy_gradient_loss | 0.00288      |\n",
      "|    std                  | 0.078        |\n",
      "|    value_loss           | 5.76e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 158         |\n",
      "|    ep_rew_mean          | 537         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 216         |\n",
      "|    iterations           | 20          |\n",
      "|    time_elapsed         | 188         |\n",
      "|    total_timesteps      | 40960       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.081892505 |\n",
      "|    clip_fraction        | 0.084       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.13        |\n",
      "|    explained_variance   | 0.997       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 223         |\n",
      "|    n_updates            | 2190        |\n",
      "|    policy_gradient_loss | 0.00279     |\n",
      "|    std                  | 0.078       |\n",
      "|    value_loss           | 438         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 173        |\n",
      "|    ep_rew_mean          | 724        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 216        |\n",
      "|    iterations           | 21         |\n",
      "|    time_elapsed         | 199        |\n",
      "|    total_timesteps      | 43008      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.62905705 |\n",
      "|    clip_fraction        | 0.162      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.14       |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 21.3       |\n",
      "|    n_updates            | 2200       |\n",
      "|    policy_gradient_loss | 0.0143     |\n",
      "|    std                  | 0.0774     |\n",
      "|    value_loss           | 135        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 177         |\n",
      "|    ep_rew_mean          | 787         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 215         |\n",
      "|    iterations           | 22          |\n",
      "|    time_elapsed         | 208         |\n",
      "|    total_timesteps      | 45056       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010528815 |\n",
      "|    clip_fraction        | 0.128       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.14        |\n",
      "|    explained_variance   | -1.63       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 237         |\n",
      "|    n_updates            | 2210        |\n",
      "|    policy_gradient_loss | 0.0139      |\n",
      "|    std                  | 0.0769      |\n",
      "|    value_loss           | 405         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 177         |\n",
      "|    ep_rew_mean          | 790         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 218         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 215         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004278884 |\n",
      "|    clip_fraction        | 0.0317      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.15        |\n",
      "|    explained_variance   | 0.871       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.9e+03     |\n",
      "|    n_updates            | 2220        |\n",
      "|    policy_gradient_loss | -8.53e-05   |\n",
      "|    std                  | 0.0769      |\n",
      "|    value_loss           | 1.31e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 177         |\n",
      "|    ep_rew_mean          | 787         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 220         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 222         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009722183 |\n",
      "|    clip_fraction        | 0.053       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.15        |\n",
      "|    explained_variance   | 0.921       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 79.9        |\n",
      "|    n_updates            | 2230        |\n",
      "|    policy_gradient_loss | 0.00499     |\n",
      "|    std                  | 0.0768      |\n",
      "|    value_loss           | 7.62e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 186        |\n",
      "|    ep_rew_mean          | 878        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 223        |\n",
      "|    iterations           | 25         |\n",
      "|    time_elapsed         | 229        |\n",
      "|    total_timesteps      | 51200      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.23818478 |\n",
      "|    clip_fraction        | 0.0737     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.15       |\n",
      "|    explained_variance   | 0.96       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 108        |\n",
      "|    n_updates            | 2240       |\n",
      "|    policy_gradient_loss | 0.0149     |\n",
      "|    std                  | 0.0766     |\n",
      "|    value_loss           | 913        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 194         |\n",
      "|    ep_rew_mean          | 969         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 225         |\n",
      "|    iterations           | 26          |\n",
      "|    time_elapsed         | 236         |\n",
      "|    total_timesteps      | 53248       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008826816 |\n",
      "|    clip_fraction        | 0.0278      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.15        |\n",
      "|    explained_variance   | 0.968       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.24e+03    |\n",
      "|    n_updates            | 2250        |\n",
      "|    policy_gradient_loss | -0.00173    |\n",
      "|    std                  | 0.0765      |\n",
      "|    value_loss           | 4.93e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 198         |\n",
      "|    ep_rew_mean          | 1.05e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 227         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 243         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007957626 |\n",
      "|    clip_fraction        | 0.0593      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.15        |\n",
      "|    explained_variance   | 0.992       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 274         |\n",
      "|    n_updates            | 2260        |\n",
      "|    policy_gradient_loss | 0.0141      |\n",
      "|    std                  | 0.0764      |\n",
      "|    value_loss           | 696         |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 205         |\n",
      "|    ep_rew_mean          | 1.16e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 229         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 249         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.017859261 |\n",
      "|    clip_fraction        | 0.0951      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.15        |\n",
      "|    explained_variance   | 0.96        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 166         |\n",
      "|    n_updates            | 2270        |\n",
      "|    policy_gradient_loss | -0.00416    |\n",
      "|    std                  | 0.076       |\n",
      "|    value_loss           | 1.48e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 225        |\n",
      "|    ep_rew_mean          | 1.41e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 231        |\n",
      "|    iterations           | 29         |\n",
      "|    time_elapsed         | 256        |\n",
      "|    total_timesteps      | 59392      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.24883033 |\n",
      "|    clip_fraction        | 0.424      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.16       |\n",
      "|    explained_variance   | 0.892      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 6.55e+03   |\n",
      "|    n_updates            | 2280       |\n",
      "|    policy_gradient_loss | 0.0194     |\n",
      "|    std                  | 0.0758     |\n",
      "|    value_loss           | 1.11e+04   |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 236        |\n",
      "|    ep_rew_mean          | 1.54e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 232        |\n",
      "|    iterations           | 30         |\n",
      "|    time_elapsed         | 263        |\n",
      "|    total_timesteps      | 61440      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.19049394 |\n",
      "|    clip_fraction        | 0.412      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.16       |\n",
      "|    explained_variance   | 0.93       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 328        |\n",
      "|    n_updates            | 2290       |\n",
      "|    policy_gradient_loss | 0.0806     |\n",
      "|    std                  | 0.0759     |\n",
      "|    value_loss           | 666        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 220        |\n",
      "|    ep_rew_mean          | 1.3e+03    |\n",
      "| time/                   |            |\n",
      "|    fps                  | 234        |\n",
      "|    iterations           | 31         |\n",
      "|    time_elapsed         | 270        |\n",
      "|    total_timesteps      | 63488      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.03181083 |\n",
      "|    clip_fraction        | 0.148      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.16       |\n",
      "|    explained_variance   | 0.856      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 1.23e+04   |\n",
      "|    n_updates            | 2300       |\n",
      "|    policy_gradient_loss | 0.00762    |\n",
      "|    std                  | 0.0758     |\n",
      "|    value_loss           | 5.52e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 212         |\n",
      "|    ep_rew_mean          | 1.19e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 236         |\n",
      "|    iterations           | 32          |\n",
      "|    time_elapsed         | 277         |\n",
      "|    total_timesteps      | 65536       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.094618514 |\n",
      "|    clip_fraction        | 0.115       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.16        |\n",
      "|    explained_variance   | 0.974       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 54.6        |\n",
      "|    n_updates            | 2310        |\n",
      "|    policy_gradient_loss | -0.00396    |\n",
      "|    std                  | 0.0758      |\n",
      "|    value_loss           | 2.09e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 216         |\n",
      "|    ep_rew_mean          | 1.23e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 237         |\n",
      "|    iterations           | 33          |\n",
      "|    time_elapsed         | 284         |\n",
      "|    total_timesteps      | 67584       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010369327 |\n",
      "|    clip_fraction        | 0.0801      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.16        |\n",
      "|    explained_variance   | 0.983       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.79e+03    |\n",
      "|    n_updates            | 2320        |\n",
      "|    policy_gradient_loss | 0.00572     |\n",
      "|    std                  | 0.0758      |\n",
      "|    value_loss           | 3.41e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 224        |\n",
      "|    ep_rew_mean          | 1.33e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 239        |\n",
      "|    iterations           | 34         |\n",
      "|    time_elapsed         | 290        |\n",
      "|    total_timesteps      | 69632      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02823722 |\n",
      "|    clip_fraction        | 0.161      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.17       |\n",
      "|    explained_variance   | 0.608      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 9          |\n",
      "|    n_updates            | 2330       |\n",
      "|    policy_gradient_loss | 0.0203     |\n",
      "|    std                  | 0.0743     |\n",
      "|    value_loss           | 215        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 228        |\n",
      "|    ep_rew_mean          | 1.38e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 240        |\n",
      "|    iterations           | 35         |\n",
      "|    time_elapsed         | 297        |\n",
      "|    total_timesteps      | 71680      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.39039555 |\n",
      "|    clip_fraction        | 0.235      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.2        |\n",
      "|    explained_variance   | 0.289      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 5.36       |\n",
      "|    n_updates            | 2340       |\n",
      "|    policy_gradient_loss | 0.0433     |\n",
      "|    std                  | 0.0722     |\n",
      "|    value_loss           | 25.9       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 239         |\n",
      "|    ep_rew_mean          | 1.53e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 242         |\n",
      "|    iterations           | 36          |\n",
      "|    time_elapsed         | 304         |\n",
      "|    total_timesteps      | 73728       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012529919 |\n",
      "|    clip_fraction        | 0.0874      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.21        |\n",
      "|    explained_variance   | 0.553       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 43.9        |\n",
      "|    n_updates            | 2350        |\n",
      "|    policy_gradient_loss | 0.00696     |\n",
      "|    std                  | 0.0718      |\n",
      "|    value_loss           | 2.66e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 247         |\n",
      "|    ep_rew_mean          | 1.64e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 243         |\n",
      "|    iterations           | 37          |\n",
      "|    time_elapsed         | 311         |\n",
      "|    total_timesteps      | 75776       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008550012 |\n",
      "|    clip_fraction        | 0.0549      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.22        |\n",
      "|    explained_variance   | 0.655       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.89        |\n",
      "|    n_updates            | 2360        |\n",
      "|    policy_gradient_loss | 0.0125      |\n",
      "|    std                  | 0.0714      |\n",
      "|    value_loss           | 576         |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 237       |\n",
      "|    ep_rew_mean          | 1.44e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 244       |\n",
      "|    iterations           | 38        |\n",
      "|    time_elapsed         | 318       |\n",
      "|    total_timesteps      | 77824     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.8660531 |\n",
      "|    clip_fraction        | 0.161     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 1.24      |\n",
      "|    explained_variance   | -6.67     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 1.69      |\n",
      "|    n_updates            | 2370      |\n",
      "|    policy_gradient_loss | 0.036     |\n",
      "|    std                  | 0.0689    |\n",
      "|    value_loss           | 60.3      |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 199        |\n",
      "|    ep_rew_mean          | 748        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 245        |\n",
      "|    iterations           | 39         |\n",
      "|    time_elapsed         | 325        |\n",
      "|    total_timesteps      | 79872      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.42356086 |\n",
      "|    clip_fraction        | 0.284      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.26       |\n",
      "|    explained_variance   | 0.393      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 7.91e+04   |\n",
      "|    n_updates            | 2380       |\n",
      "|    policy_gradient_loss | 0.0482     |\n",
      "|    std                  | 0.0688     |\n",
      "|    value_loss           | 8.67e+04   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 166          |\n",
      "|    ep_rew_mean          | 146          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 246          |\n",
      "|    iterations           | 40           |\n",
      "|    time_elapsed         | 331          |\n",
      "|    total_timesteps      | 81920        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0054756105 |\n",
      "|    clip_fraction        | 0.0437       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.26         |\n",
      "|    explained_variance   | 0.796        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.7e+04      |\n",
      "|    n_updates            | 2390         |\n",
      "|    policy_gradient_loss | -0.00345     |\n",
      "|    std                  | 0.0688       |\n",
      "|    value_loss           | 1.4e+05      |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 164        |\n",
      "|    ep_rew_mean          | 70.7       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 247        |\n",
      "|    iterations           | 41         |\n",
      "|    time_elapsed         | 338        |\n",
      "|    total_timesteps      | 83968      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00809764 |\n",
      "|    clip_fraction        | 0.077      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.26       |\n",
      "|    explained_variance   | 0.869      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 5.92e+04   |\n",
      "|    n_updates            | 2400       |\n",
      "|    policy_gradient_loss | -0.00542   |\n",
      "|    std                  | 0.0688     |\n",
      "|    value_loss           | 1.29e+05   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 141         |\n",
      "|    ep_rew_mean          | -315        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 248         |\n",
      "|    iterations           | 42          |\n",
      "|    time_elapsed         | 345         |\n",
      "|    total_timesteps      | 86016       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005289763 |\n",
      "|    clip_fraction        | 0.0404      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.26        |\n",
      "|    explained_variance   | 0.897       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.01e+03    |\n",
      "|    n_updates            | 2410        |\n",
      "|    policy_gradient_loss | -0.00541    |\n",
      "|    std                  | 0.0688      |\n",
      "|    value_loss           | 2.73e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 125          |\n",
      "|    ep_rew_mean          | -605         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 249          |\n",
      "|    iterations           | 43           |\n",
      "|    time_elapsed         | 352          |\n",
      "|    total_timesteps      | 88064        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015714664 |\n",
      "|    clip_fraction        | 0.0185       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.26         |\n",
      "|    explained_variance   | 0.632        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.34e+05     |\n",
      "|    n_updates            | 2420         |\n",
      "|    policy_gradient_loss | -0.00235     |\n",
      "|    std                  | 0.0688       |\n",
      "|    value_loss           | 1.68e+05     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 122          |\n",
      "|    ep_rew_mean          | -721         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 250          |\n",
      "|    iterations           | 44           |\n",
      "|    time_elapsed         | 359          |\n",
      "|    total_timesteps      | 90112        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0017869517 |\n",
      "|    clip_fraction        | 0.0168       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.26         |\n",
      "|    explained_variance   | 0.498        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.59e+04     |\n",
      "|    n_updates            | 2430         |\n",
      "|    policy_gradient_loss | -0.00375     |\n",
      "|    std                  | 0.0687       |\n",
      "|    value_loss           | 1.49e+05     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 130          |\n",
      "|    ep_rew_mean          | -632         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 251          |\n",
      "|    iterations           | 45           |\n",
      "|    time_elapsed         | 366          |\n",
      "|    total_timesteps      | 92160        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0024866855 |\n",
      "|    clip_fraction        | 0.0258       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.26         |\n",
      "|    explained_variance   | 0.573        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.49e+05     |\n",
      "|    n_updates            | 2440         |\n",
      "|    policy_gradient_loss | -0.00609     |\n",
      "|    std                  | 0.0687       |\n",
      "|    value_loss           | 2.46e+05     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 146          |\n",
      "|    ep_rew_mean          | -395         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 252          |\n",
      "|    iterations           | 46           |\n",
      "|    time_elapsed         | 372          |\n",
      "|    total_timesteps      | 94208        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0015641276 |\n",
      "|    clip_fraction        | 0.0206       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 1.26         |\n",
      "|    explained_variance   | 0.464        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.27e+04     |\n",
      "|    n_updates            | 2450         |\n",
      "|    policy_gradient_loss | -0.00398     |\n",
      "|    std                  | 0.0686       |\n",
      "|    value_loss           | 1.66e+05     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 161         |\n",
      "|    ep_rew_mean          | -175        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 253         |\n",
      "|    iterations           | 47          |\n",
      "|    time_elapsed         | 379         |\n",
      "|    total_timesteps      | 96256       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008723415 |\n",
      "|    clip_fraction        | 0.0285      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.26        |\n",
      "|    explained_variance   | 0.312       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.52e+03    |\n",
      "|    n_updates            | 2460        |\n",
      "|    policy_gradient_loss | -0.00517    |\n",
      "|    std                  | 0.0681      |\n",
      "|    value_loss           | 5.18e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 180         |\n",
      "|    ep_rew_mean          | 103         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 254         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 386         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.024630819 |\n",
      "|    clip_fraction        | 0.0685      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 1.27        |\n",
      "|    explained_variance   | 0.934       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 64.7        |\n",
      "|    n_updates            | 2470        |\n",
      "|    policy_gradient_loss | 0.0356      |\n",
      "|    std                  | 0.0677      |\n",
      "|    value_loss           | 830         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 195        |\n",
      "|    ep_rew_mean          | 321        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 255        |\n",
      "|    iterations           | 49         |\n",
      "|    time_elapsed         | 393        |\n",
      "|    total_timesteps      | 100352     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.12989855 |\n",
      "|    clip_fraction        | 0.103      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 1.28       |\n",
      "|    explained_variance   | 0.971      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 34.5       |\n",
      "|    n_updates            | 2480       |\n",
      "|    policy_gradient_loss | 0.0586     |\n",
      "|    std                  | 0.067      |\n",
      "|    value_loss           | 184        |\n",
      "----------------------------------------\n",
      "--- 397.0252616405487 seconds ---\n"
     ]
    }
   ],
   "source": [
    "for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:\n",
    "    model = PPO.load(\"model_backup/acc-2000000-64-64-64-64-100000-100000-\"+str(p))\n",
    "    model.set_env(env)\n",
    "    \n",
    "    print(\"p=\",p)\n",
    "\n",
    "    env.init_polytopes(p,retrain_polytopes)\n",
    "    start_time = time.time()\n",
    "    model=model.learn(total_timesteps=training_episode_length)\n",
    "    print(\"--- %s seconds ---\" % (time.time() - start_time))\n",
    "\n",
    "    model.save(\"model_backup/acc-2000000-64-64-64-64-100000-200000-\"+str(p))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.0\n",
      "Overall:\n",
      "mean_reward:354.64 +/- 1260.78\n",
      "Focus Polytopes:\n",
      "mean_reward:-1130.65 +/- 1517.93\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.1\n",
      "Overall:\n",
      "mean_reward:3071.47 +/- 1916.02\n",
      "Focus Polytopes:\n",
      "mean_reward:4085.63 +/- 7.26\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.5\n",
      "Overall:\n",
      "mean_reward:3612.11 +/- 1320.34\n",
      "Focus Polytopes:\n",
      "mean_reward:4093.58 +/- 1.77\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.9\n",
      "Overall:\n",
      "mean_reward:3563.95 +/- 1367.66\n",
      "Focus Polytopes:\n",
      "mean_reward:4069.02 +/- 271.79\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 0.95\n",
      "Overall:\n",
      "mean_reward:-287.43 +/- 613.73\n",
      "Focus Polytopes:\n",
      "mean_reward:-1974.47 +/- 44.27\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n",
      "p= 1.0\n",
      "Overall:\n",
      "mean_reward:-1620.53 +/- 1250.05\n",
      "Focus Polytopes:\n",
      "mean_reward:-780.57 +/- 2438.86\n"
     ]
    }
   ],
   "source": [
    "# Performance of models on focus polytopes only?\n",
    "for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:\n",
    "    model = PPO.load(\"model_backup/acc-2000000-64-64-64-64-100000-200000-\"+str(p))\n",
    "    model.set_env(env)\n",
    "    print(\"p=\",p)\n",
    "    \n",
    "    print(\"Overall:\")\n",
    "    env.init_polytopes(1.0,[])\n",
    "    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)\n",
    "    results_overall[p].append((mean_reward, std_reward))\n",
    "    print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")\n",
    "    \n",
    "    print(\"Focus Polytopes:\")\n",
    "    env.init_polytopes(0.0,retrain_polytopes)\n",
    "    mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=eval_episode_length)\n",
    "    results_polys[p].append((mean_reward, std_reward))\n",
    "    print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.seed(1997)\n",
    "torch.manual_seed(1997)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for p in [0.0, 0.1, 0.5, 0.9, 0.95, 1.0]:\n",
    "    print(\"p=\",p)\n",
    "    print(results_overall[p])\n",
    "    print(results_polys[p])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results_overall={\n",
    "    0.0:[(2929.5005532604455, 1987.642181434125), (740.0034029740095, 2341.5728469501078)],\n",
    "    0.1:[(3418.1372081091404, 1656.1341466200433), (1866.799432562232, 2348.1654507259573)],\n",
    "    0.5:[(1932.8646950793266, 2531.421546896283), (2298.278150297642, 2471.8655599065737)],\n",
    "    0.9:[(1714.7107323160171, 2511.671795638882), (1075.2834744087459, 2571.772665344986)],\n",
    "    0.95:[(175.59871589612962, 2224.4011404884886), (2903.4489146926403, 2248.3917252352485)],\n",
    "    1.0:[(1674.6219416435956, 2785.8237821959356), (805.8681440439224, 2708.9763965474235)]\n",
    "}\n",
    "results_polys={\n",
    "    0.0:[(4012.506271957636, 42.33747226187223), (224.28019980418682, 2888.451668489511)],\n",
    "    0.1:[(3989.145850322723, 331.1198812155058), (4027.3148490834237, 38.78762979327285)],\n",
    "    0.5:[(1604.8040512683392, 2923.258400329725), (1520.517672857523, 2944.2719326856286)],\n",
    "    0.9:[(986.1459198029041, 2952.599582192741), (-128.44361815786363, 2540.549608587723)],\n",
    "    0.95:[(-1960.620536404848, 174.07861630854438), (662.739118689537, 2973.2722718776295)],\n",
    "    1.0:[(-1903.3717366616727, 737.579486103041), (-1969.3372723238467, 426.6580722390694)]\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We evaluate `model_backup/acc-2000000-64-64-64-64-100000-0.1`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "@webio": {
   "lastCommId": null,
   "lastKernelId": null
  },
  "kernelspec": {
   "display_name": "nnequiv-tf1",
   "language": "python",
   "name": "nnequiv-tf1"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
