{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym\n",
    "\n",
    "from stable_baselines3 import PPO\n",
    "from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise\n",
    "from stable_baselines3.common.evaluation import evaluate_policy\n",
    "import time\n",
    "import numpy as np\n",
    "\n",
    "from torch import nn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using cuda device\n",
      "Wrapping the env with a `Monitor` wrapper\n",
      "Wrapping the env in a DummyVecEnv.\n"
     ]
    }
   ],
   "source": [
    "env = gym.make(\"acc-variant-v1\")\n",
    "architecture = [dict(pi=[64, 64, 64, 64], vf=[64, 64, 64, 64])]\n",
    "model = PPO(\"MlpPolicy\", env, verbose=1,policy_kwargs={\"activation_fn\":nn.ReLU,\"net_arch\":architecture})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"ppo_acc_relu_bigger\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:-1157.93 +/- 658.78\n"
     ]
    }
   ],
   "source": [
    "# Use a separate environement for evaluation\n",
    "eval_env = gym.make('acc-variant-v1')\n",
    "\n",
    "# Random Agent, before training\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------\n",
      "| rollout/           |           |\n",
      "|    ep_len_mean     | 36.7      |\n",
      "|    ep_rew_mean     | -1.03e+03 |\n",
      "| time/              |           |\n",
      "|    fps             | 433       |\n",
      "|    iterations      | 1         |\n",
      "|    time_elapsed    | 4         |\n",
      "|    total_timesteps | 2048      |\n",
      "----------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 39.5        |\n",
      "|    ep_rew_mean          | -967        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 330         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 12          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011984111 |\n",
      "|    clip_fraction        | 0.151       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.4        |\n",
      "|    explained_variance   | 0.748       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.83e+04    |\n",
      "|    n_updates            | 50          |\n",
      "|    policy_gradient_loss | -0.000608   |\n",
      "|    std                  | 0.984       |\n",
      "|    value_loss           | 3.62e+04    |\n",
      "-----------------------------------------\n",
      "--------------------------------------\n",
      "| rollout/                |          |\n",
      "|    ep_len_mean          | 41.8     |\n",
      "|    ep_rew_mean          | -948     |\n",
      "| time/                   |          |\n",
      "|    fps                  | 273      |\n",
      "|    iterations           | 3        |\n",
      "|    time_elapsed         | 22       |\n",
      "|    total_timesteps      | 6144     |\n",
      "| train/                  |          |\n",
      "|    approx_kl            | 0.018469 |\n",
      "|    clip_fraction        | 0.183    |\n",
      "|    clip_range           | 0.2      |\n",
      "|    entropy_loss         | -1.4     |\n",
      "|    explained_variance   | 0.636    |\n",
      "|    learning_rate        | 0.0003   |\n",
      "|    loss                 | 1.26e+03 |\n",
      "|    n_updates            | 60       |\n",
      "|    policy_gradient_loss | -0.00388 |\n",
      "|    std                  | 0.983    |\n",
      "|    value_loss           | 3.46e+04 |\n",
      "--------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 46.6        |\n",
      "|    ep_rew_mean          | -970        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 254         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 32          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015658874 |\n",
      "|    clip_fraction        | 0.217       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.4        |\n",
      "|    explained_variance   | 0.502       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.46e+04    |\n",
      "|    n_updates            | 70          |\n",
      "|    policy_gradient_loss | -0.00707    |\n",
      "|    std                  | 0.98        |\n",
      "|    value_loss           | 4.27e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 59.1         |\n",
      "|    ep_rew_mean          | -912         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 238          |\n",
      "|    iterations           | 5            |\n",
      "|    time_elapsed         | 42           |\n",
      "|    total_timesteps      | 10240        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0121331895 |\n",
      "|    clip_fraction        | 0.182        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.39        |\n",
      "|    explained_variance   | 0.597        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 4.51e+03     |\n",
      "|    n_updates            | 80           |\n",
      "|    policy_gradient_loss | -0.00473     |\n",
      "|    std                  | 0.973        |\n",
      "|    value_loss           | 3.83e+04     |\n",
      "------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "model = model.learn(total_timesteps=10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:2999.79 +/- 2293.18\n"
     ]
    }
   ],
   "source": [
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 274      |\n",
      "|    ep_rew_mean     | 298      |\n",
      "| time/              |          |\n",
      "|    fps             | 499      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 4        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 284         |\n",
      "|    ep_rew_mean          | 376         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 385         |\n",
      "|    iterations           | 2           |\n",
      "|    time_elapsed         | 10          |\n",
      "|    total_timesteps      | 4096        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.019064836 |\n",
      "|    clip_fraction        | 0.279       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.38       |\n",
      "|    explained_variance   | 0.319       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.43e+03    |\n",
      "|    n_updates            | 100         |\n",
      "|    policy_gradient_loss | 0.019       |\n",
      "|    std                  | 0.96        |\n",
      "|    value_loss           | 1.46e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 307         |\n",
      "|    ep_rew_mean          | 361         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 343         |\n",
      "|    iterations           | 3           |\n",
      "|    time_elapsed         | 17          |\n",
      "|    total_timesteps      | 6144        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009664226 |\n",
      "|    clip_fraction        | 0.11        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.36       |\n",
      "|    explained_variance   | -0.324      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 400         |\n",
      "|    n_updates            | 110         |\n",
      "|    policy_gradient_loss | -0.00184    |\n",
      "|    std                  | 0.918       |\n",
      "|    value_loss           | 3.51e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 303          |\n",
      "|    ep_rew_mean          | 401          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 319          |\n",
      "|    iterations           | 4            |\n",
      "|    time_elapsed         | 25           |\n",
      "|    total_timesteps      | 8192         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0055952715 |\n",
      "|    clip_fraction        | 0.0421       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.33        |\n",
      "|    explained_variance   | 0.476        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.02e+03     |\n",
      "|    n_updates            | 120          |\n",
      "|    policy_gradient_loss | -0.00181     |\n",
      "|    std                  | 0.914        |\n",
      "|    value_loss           | 1.87e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 275         |\n",
      "|    ep_rew_mean          | 238         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 286         |\n",
      "|    iterations           | 5           |\n",
      "|    time_elapsed         | 35          |\n",
      "|    total_timesteps      | 10240       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.017075716 |\n",
      "|    clip_fraction        | 0.213       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.29       |\n",
      "|    explained_variance   | -0.735      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 103         |\n",
      "|    n_updates            | 130         |\n",
      "|    policy_gradient_loss | 0.0156      |\n",
      "|    std                  | 0.86        |\n",
      "|    value_loss           | 6.35e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 278          |\n",
      "|    ep_rew_mean          | 289          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 271          |\n",
      "|    iterations           | 6            |\n",
      "|    time_elapsed         | 45           |\n",
      "|    total_timesteps      | 12288        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0053234175 |\n",
      "|    clip_fraction        | 0.0506       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.26        |\n",
      "|    explained_variance   | 0.828        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.9e+03      |\n",
      "|    n_updates            | 140          |\n",
      "|    policy_gradient_loss | -0.0088      |\n",
      "|    std                  | 0.855        |\n",
      "|    value_loss           | 6.11e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 283         |\n",
      "|    ep_rew_mean          | 350         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 248         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 57          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004529842 |\n",
      "|    clip_fraction        | 0.0565      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.26       |\n",
      "|    explained_variance   | 0.772       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.79e+03    |\n",
      "|    n_updates            | 150         |\n",
      "|    policy_gradient_loss | -0.00192    |\n",
      "|    std                  | 0.85        |\n",
      "|    value_loss           | 8.83e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 281          |\n",
      "|    ep_rew_mean          | 389          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 245          |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 66           |\n",
      "|    total_timesteps      | 16384        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0029312633 |\n",
      "|    clip_fraction        | 0.0541       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.26        |\n",
      "|    explained_variance   | 0.487        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.1e+03      |\n",
      "|    n_updates            | 160          |\n",
      "|    policy_gradient_loss | -0.00227     |\n",
      "|    std                  | 0.849        |\n",
      "|    value_loss           | 9.05e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 279         |\n",
      "|    ep_rew_mean          | 416         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 239         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 76          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005206993 |\n",
      "|    clip_fraction        | 0.0151      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.25       |\n",
      "|    explained_variance   | 0.577       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 749         |\n",
      "|    n_updates            | 170         |\n",
      "|    policy_gradient_loss | -0.00185    |\n",
      "|    std                  | 0.846       |\n",
      "|    value_loss           | 5.21e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 280          |\n",
      "|    ep_rew_mean          | 436          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 236          |\n",
      "|    iterations           | 10           |\n",
      "|    time_elapsed         | 86           |\n",
      "|    total_timesteps      | 20480        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0032270018 |\n",
      "|    clip_fraction        | 0.0147       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.25        |\n",
      "|    explained_variance   | 0.7          |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 557          |\n",
      "|    n_updates            | 180          |\n",
      "|    policy_gradient_loss | -0.00382     |\n",
      "|    std                  | 0.844        |\n",
      "|    value_loss           | 4.49e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 271        |\n",
      "|    ep_rew_mean          | 390        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 233        |\n",
      "|    iterations           | 11         |\n",
      "|    time_elapsed         | 96         |\n",
      "|    total_timesteps      | 22528      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01104001 |\n",
      "|    clip_fraction        | 0.0473     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -1.25      |\n",
      "|    explained_variance   | 0.918      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 108        |\n",
      "|    n_updates            | 190        |\n",
      "|    policy_gradient_loss | -0.00265   |\n",
      "|    std                  | 0.841      |\n",
      "|    value_loss           | 1.31e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 276          |\n",
      "|    ep_rew_mean          | 449          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 230          |\n",
      "|    iterations           | 12           |\n",
      "|    time_elapsed         | 106          |\n",
      "|    total_timesteps      | 24576        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0065267347 |\n",
      "|    clip_fraction        | 0.0746       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.25        |\n",
      "|    explained_variance   | 0.412        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 7.84e+03     |\n",
      "|    n_updates            | 200          |\n",
      "|    policy_gradient_loss | -0.00822     |\n",
      "|    std                  | 0.84         |\n",
      "|    value_loss           | 2.53e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 281         |\n",
      "|    ep_rew_mean          | 487         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 228         |\n",
      "|    iterations           | 13          |\n",
      "|    time_elapsed         | 116         |\n",
      "|    total_timesteps      | 26624       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009124799 |\n",
      "|    clip_fraction        | 0.109       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.24       |\n",
      "|    explained_variance   | 0.254       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 780         |\n",
      "|    n_updates            | 210         |\n",
      "|    policy_gradient_loss | -0.00925    |\n",
      "|    std                  | 0.839       |\n",
      "|    value_loss           | 4.08e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 284          |\n",
      "|    ep_rew_mean          | 518          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 227          |\n",
      "|    iterations           | 14           |\n",
      "|    time_elapsed         | 126          |\n",
      "|    total_timesteps      | 28672        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038339854 |\n",
      "|    clip_fraction        | 0.0689       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.24        |\n",
      "|    explained_variance   | 0.872        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 113          |\n",
      "|    n_updates            | 220          |\n",
      "|    policy_gradient_loss | -0.00352     |\n",
      "|    std                  | 0.833        |\n",
      "|    value_loss           | 1.53e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 290         |\n",
      "|    ep_rew_mean          | 574         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 224         |\n",
      "|    iterations           | 15          |\n",
      "|    time_elapsed         | 136         |\n",
      "|    total_timesteps      | 30720       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.022553045 |\n",
      "|    clip_fraction        | 0.0871      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.23       |\n",
      "|    explained_variance   | 0.835       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 56.9        |\n",
      "|    n_updates            | 230         |\n",
      "|    policy_gradient_loss | -0.00155    |\n",
      "|    std                  | 0.816       |\n",
      "|    value_loss           | 289         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 290          |\n",
      "|    ep_rew_mean          | 588          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 220          |\n",
      "|    iterations           | 16           |\n",
      "|    time_elapsed         | 148          |\n",
      "|    total_timesteps      | 32768        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0069336933 |\n",
      "|    clip_fraction        | 0.0624       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.2         |\n",
      "|    explained_variance   | 0.939        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 61.7         |\n",
      "|    n_updates            | 240          |\n",
      "|    policy_gradient_loss | -0.00761     |\n",
      "|    std                  | 0.798        |\n",
      "|    value_loss           | 251          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 287          |\n",
      "|    ep_rew_mean          | 615          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 217          |\n",
      "|    iterations           | 17           |\n",
      "|    time_elapsed         | 160          |\n",
      "|    total_timesteps      | 34816        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0058220327 |\n",
      "|    clip_fraction        | 0.0629       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.18        |\n",
      "|    explained_variance   | 0.97         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 101          |\n",
      "|    n_updates            | 250          |\n",
      "|    policy_gradient_loss | -0.000863    |\n",
      "|    std                  | 0.783        |\n",
      "|    value_loss           | 835          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 282         |\n",
      "|    ep_rew_mean          | 604         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 214         |\n",
      "|    iterations           | 18          |\n",
      "|    time_elapsed         | 171         |\n",
      "|    total_timesteps      | 36864       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007941925 |\n",
      "|    clip_fraction        | 0.0706      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.17       |\n",
      "|    explained_variance   | -0.0986     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.19e+04    |\n",
      "|    n_updates            | 260         |\n",
      "|    policy_gradient_loss | -0.00365    |\n",
      "|    std                  | 0.783       |\n",
      "|    value_loss           | 1.84e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 286         |\n",
      "|    ep_rew_mean          | 640         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 212         |\n",
      "|    iterations           | 19          |\n",
      "|    time_elapsed         | 183         |\n",
      "|    total_timesteps      | 38912       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007919725 |\n",
      "|    clip_fraction        | 0.116       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.17       |\n",
      "|    explained_variance   | 0.53        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.97e+03    |\n",
      "|    n_updates            | 270         |\n",
      "|    policy_gradient_loss | -0.0129     |\n",
      "|    std                  | 0.782       |\n",
      "|    value_loss           | 1.54e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 290          |\n",
      "|    ep_rew_mean          | 683          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 210          |\n",
      "|    iterations           | 20           |\n",
      "|    time_elapsed         | 194          |\n",
      "|    total_timesteps      | 40960        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0097328685 |\n",
      "|    clip_fraction        | 0.0874       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.17        |\n",
      "|    explained_variance   | 0.894        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 414          |\n",
      "|    n_updates            | 280          |\n",
      "|    policy_gradient_loss | -0.00631     |\n",
      "|    std                  | 0.776        |\n",
      "|    value_loss           | 1.61e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 296          |\n",
      "|    ep_rew_mean          | 745          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 208          |\n",
      "|    iterations           | 21           |\n",
      "|    time_elapsed         | 206          |\n",
      "|    total_timesteps      | 43008        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0062592123 |\n",
      "|    clip_fraction        | 0.0861       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.15        |\n",
      "|    explained_variance   | 0.965        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 68.6         |\n",
      "|    n_updates            | 290          |\n",
      "|    policy_gradient_loss | 0.00462      |\n",
      "|    std                  | 0.757        |\n",
      "|    value_loss           | 452          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 300          |\n",
      "|    ep_rew_mean          | 788          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 207          |\n",
      "|    iterations           | 22           |\n",
      "|    time_elapsed         | 216          |\n",
      "|    total_timesteps      | 45056        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0025468448 |\n",
      "|    clip_fraction        | 0.0232       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.12        |\n",
      "|    explained_variance   | -1.5         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 270          |\n",
      "|    n_updates            | 300          |\n",
      "|    policy_gradient_loss | -0.00455     |\n",
      "|    std                  | 0.735        |\n",
      "|    value_loss           | 803          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 300         |\n",
      "|    ep_rew_mean          | 778         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 208         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 226         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015049579 |\n",
      "|    clip_fraction        | 0.179       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.1        |\n",
      "|    explained_variance   | -0.324      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 63          |\n",
      "|    n_updates            | 310         |\n",
      "|    policy_gradient_loss | -0.0119     |\n",
      "|    std                  | 0.719       |\n",
      "|    value_loss           | 122         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 312          |\n",
      "|    ep_rew_mean          | 874          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 208          |\n",
      "|    iterations           | 24           |\n",
      "|    time_elapsed         | 235          |\n",
      "|    total_timesteps      | 49152        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0050467635 |\n",
      "|    clip_fraction        | 0.06         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.09        |\n",
      "|    explained_variance   | 0.18         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.26e+04     |\n",
      "|    n_updates            | 320          |\n",
      "|    policy_gradient_loss | -0.0051      |\n",
      "|    std                  | 0.718        |\n",
      "|    value_loss           | 2.18e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 312         |\n",
      "|    ep_rew_mean          | 885         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 208         |\n",
      "|    iterations           | 25          |\n",
      "|    time_elapsed         | 245         |\n",
      "|    total_timesteps      | 51200       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011464922 |\n",
      "|    clip_fraction        | 0.194       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.08       |\n",
      "|    explained_variance   | -0.145      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 242         |\n",
      "|    n_updates            | 330         |\n",
      "|    policy_gradient_loss | -0.00324    |\n",
      "|    std                  | 0.711       |\n",
      "|    value_loss           | 431         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 314          |\n",
      "|    ep_rew_mean          | 901          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 208          |\n",
      "|    iterations           | 26           |\n",
      "|    time_elapsed         | 254          |\n",
      "|    total_timesteps      | 53248        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0050167595 |\n",
      "|    clip_fraction        | 0.077        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -1.06        |\n",
      "|    explained_variance   | 0.817        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 91.5         |\n",
      "|    n_updates            | 340          |\n",
      "|    policy_gradient_loss | -0.00396     |\n",
      "|    std                  | 0.687        |\n",
      "|    value_loss           | 469          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 323         |\n",
      "|    ep_rew_mean          | 990         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 264         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010704014 |\n",
      "|    clip_fraction        | 0.0838      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -1.03       |\n",
      "|    explained_variance   | 0.647       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 49.7        |\n",
      "|    n_updates            | 350         |\n",
      "|    policy_gradient_loss | -0.001      |\n",
      "|    std                  | 0.662       |\n",
      "|    value_loss           | 1.32e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 318          |\n",
      "|    ep_rew_mean          | 963          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 28           |\n",
      "|    time_elapsed         | 273          |\n",
      "|    total_timesteps      | 57344        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0069622668 |\n",
      "|    clip_fraction        | 0.0712       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.983       |\n",
      "|    explained_variance   | 0.0893       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 42.9         |\n",
      "|    n_updates            | 360          |\n",
      "|    policy_gradient_loss | -0.00604     |\n",
      "|    std                  | 0.631        |\n",
      "|    value_loss           | 96.8         |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 318          |\n",
      "|    ep_rew_mean          | 986          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 29           |\n",
      "|    time_elapsed         | 283          |\n",
      "|    total_timesteps      | 59392        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0012289099 |\n",
      "|    clip_fraction        | 0.0195       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.956       |\n",
      "|    explained_variance   | 0.601        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.75e+03     |\n",
      "|    n_updates            | 370          |\n",
      "|    policy_gradient_loss | -5.63e-05    |\n",
      "|    std                  | 0.628        |\n",
      "|    value_loss           | 8.39e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 311         |\n",
      "|    ep_rew_mean          | 973         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 292         |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006703225 |\n",
      "|    clip_fraction        | 0.078       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.944      |\n",
      "|    explained_variance   | -4.79       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 702         |\n",
      "|    n_updates            | 380         |\n",
      "|    policy_gradient_loss | -0.00174    |\n",
      "|    std                  | 0.616       |\n",
      "|    value_loss           | 1.07e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 319         |\n",
      "|    ep_rew_mean          | 1.07e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 31          |\n",
      "|    time_elapsed         | 302         |\n",
      "|    total_timesteps      | 63488       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.001173464 |\n",
      "|    clip_fraction        | 0.0128      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.934      |\n",
      "|    explained_variance   | 0.128       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.77e+03    |\n",
      "|    n_updates            | 390         |\n",
      "|    policy_gradient_loss | -0.00382    |\n",
      "|    std                  | 0.615       |\n",
      "|    value_loss           | 6.37e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 321          |\n",
      "|    ep_rew_mean          | 1.11e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 210          |\n",
      "|    iterations           | 32           |\n",
      "|    time_elapsed         | 311          |\n",
      "|    total_timesteps      | 65536        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0045986576 |\n",
      "|    clip_fraction        | 0.0518       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.93        |\n",
      "|    explained_variance   | 0.25         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 112          |\n",
      "|    n_updates            | 400          |\n",
      "|    policy_gradient_loss | -0.00317     |\n",
      "|    std                  | 0.611        |\n",
      "|    value_loss           | 2.56e+03     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 320          |\n",
      "|    ep_rew_mean          | 1.12e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 210          |\n",
      "|    iterations           | 33           |\n",
      "|    time_elapsed         | 321          |\n",
      "|    total_timesteps      | 67584        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0051351176 |\n",
      "|    clip_fraction        | 0.0561       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.91        |\n",
      "|    explained_variance   | -1.34        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 51.4         |\n",
      "|    n_updates            | 410          |\n",
      "|    policy_gradient_loss | 0.00162      |\n",
      "|    std                  | 0.591        |\n",
      "|    value_loss           | 170          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 328          |\n",
      "|    ep_rew_mean          | 1.2e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 210          |\n",
      "|    iterations           | 34           |\n",
      "|    time_elapsed         | 330          |\n",
      "|    total_timesteps      | 69632        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0053071855 |\n",
      "|    clip_fraction        | 0.041        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.889       |\n",
      "|    explained_variance   | 0.578        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.11e+03     |\n",
      "|    n_updates            | 420          |\n",
      "|    policy_gradient_loss | -0.00255     |\n",
      "|    std                  | 0.586        |\n",
      "|    value_loss           | 1.05e+04     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 339        |\n",
      "|    ep_rew_mean          | 1.32e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 210        |\n",
      "|    iterations           | 35         |\n",
      "|    time_elapsed         | 340        |\n",
      "|    total_timesteps      | 71680      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01063101 |\n",
      "|    clip_fraction        | 0.109      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.858     |\n",
      "|    explained_variance   | -0.357     |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 36.1       |\n",
      "|    n_updates            | 430        |\n",
      "|    policy_gradient_loss | -0.00824   |\n",
      "|    std                  | 0.559      |\n",
      "|    value_loss           | 99.8       |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 343          |\n",
      "|    ep_rew_mean          | 1.38e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 210          |\n",
      "|    iterations           | 36           |\n",
      "|    time_elapsed         | 349          |\n",
      "|    total_timesteps      | 73728        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0060965894 |\n",
      "|    clip_fraction        | 0.0551       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.811       |\n",
      "|    explained_variance   | 0.0316       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 46.9         |\n",
      "|    n_updates            | 440          |\n",
      "|    policy_gradient_loss | -0.00746     |\n",
      "|    std                  | 0.53         |\n",
      "|    value_loss           | 111          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 343          |\n",
      "|    ep_rew_mean          | 1.41e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 210          |\n",
      "|    iterations           | 37           |\n",
      "|    time_elapsed         | 359          |\n",
      "|    total_timesteps      | 75776        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038534217 |\n",
      "|    clip_fraction        | 0.0358       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.777       |\n",
      "|    explained_variance   | 0.284        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 133          |\n",
      "|    n_updates            | 450          |\n",
      "|    policy_gradient_loss | -0.0028      |\n",
      "|    std                  | 0.521        |\n",
      "|    value_loss           | 1.59e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 19.7       |\n",
      "|    ep_rew_mean          | -970       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 211        |\n",
      "|    iterations           | 38         |\n",
      "|    time_elapsed         | 368        |\n",
      "|    total_timesteps      | 77824      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.24875751 |\n",
      "|    clip_fraction        | 0.12       |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.752     |\n",
      "|    explained_variance   | 0.803      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 878        |\n",
      "|    n_updates            | 460        |\n",
      "|    policy_gradient_loss | 0.00689    |\n",
      "|    std                  | 0.504      |\n",
      "|    value_loss           | 1.49e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 24          |\n",
      "|    ep_rew_mean          | -883        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 211         |\n",
      "|    iterations           | 39          |\n",
      "|    time_elapsed         | 378         |\n",
      "|    total_timesteps      | 79872       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.003245651 |\n",
      "|    clip_fraction        | 0.0382      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.733      |\n",
      "|    explained_variance   | 0.501       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 3.14e+03    |\n",
      "|    n_updates            | 470         |\n",
      "|    policy_gradient_loss | -0.00095    |\n",
      "|    std                  | 0.504       |\n",
      "|    value_loss           | 5.06e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 22.9        |\n",
      "|    ep_rew_mean          | -961        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 210         |\n",
      "|    iterations           | 40          |\n",
      "|    time_elapsed         | 389         |\n",
      "|    total_timesteps      | 81920       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004391187 |\n",
      "|    clip_fraction        | 0.0472      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.733      |\n",
      "|    explained_variance   | 0.755       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.54e+04    |\n",
      "|    n_updates            | 480         |\n",
      "|    policy_gradient_loss | -0.00189    |\n",
      "|    std                  | 0.503       |\n",
      "|    value_loss           | 2.94e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 28.3        |\n",
      "|    ep_rew_mean          | -887        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 41          |\n",
      "|    time_elapsed         | 401         |\n",
      "|    total_timesteps      | 83968       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004969407 |\n",
      "|    clip_fraction        | 0.0352      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.732      |\n",
      "|    explained_variance   | 0.798       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 8.17e+03    |\n",
      "|    n_updates            | 490         |\n",
      "|    policy_gradient_loss | -0.0036     |\n",
      "|    std                  | 0.503       |\n",
      "|    value_loss           | 3.61e+04    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 25.7         |\n",
      "|    ep_rew_mean          | -867         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 208          |\n",
      "|    iterations           | 42           |\n",
      "|    time_elapsed         | 412          |\n",
      "|    total_timesteps      | 86016        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018128871 |\n",
      "|    clip_fraction        | 0.0191       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.731       |\n",
      "|    explained_variance   | 0.846        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.47e+03     |\n",
      "|    n_updates            | 500          |\n",
      "|    policy_gradient_loss | 0.000233     |\n",
      "|    std                  | 0.503        |\n",
      "|    value_loss           | 2.14e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 27.4         |\n",
      "|    ep_rew_mean          | -876         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 207          |\n",
      "|    iterations           | 43           |\n",
      "|    time_elapsed         | 424          |\n",
      "|    total_timesteps      | 88064        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0040624957 |\n",
      "|    clip_fraction        | 0.0567       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.731       |\n",
      "|    explained_variance   | 0.844        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.71e+03     |\n",
      "|    n_updates            | 510          |\n",
      "|    policy_gradient_loss | -0.00218     |\n",
      "|    std                  | 0.502        |\n",
      "|    value_loss           | 1.38e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 30.3        |\n",
      "|    ep_rew_mean          | -878        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 205         |\n",
      "|    iterations           | 44          |\n",
      "|    time_elapsed         | 437         |\n",
      "|    total_timesteps      | 90112       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.007076977 |\n",
      "|    clip_fraction        | 0.0858      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.73       |\n",
      "|    explained_variance   | 0.904       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.06e+03    |\n",
      "|    n_updates            | 520         |\n",
      "|    policy_gradient_loss | -0.0044     |\n",
      "|    std                  | 0.502       |\n",
      "|    value_loss           | 9.12e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 39          |\n",
      "|    ep_rew_mean          | -843        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 204         |\n",
      "|    iterations           | 45          |\n",
      "|    time_elapsed         | 450         |\n",
      "|    total_timesteps      | 92160       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015077235 |\n",
      "|    clip_fraction        | 0.195       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.729      |\n",
      "|    explained_variance   | 0.825       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.73e+03    |\n",
      "|    n_updates            | 530         |\n",
      "|    policy_gradient_loss | -0.00836    |\n",
      "|    std                  | 0.501       |\n",
      "|    value_loss           | 1.88e+04    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 53.5        |\n",
      "|    ep_rew_mean          | -744        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 203         |\n",
      "|    iterations           | 46          |\n",
      "|    time_elapsed         | 463         |\n",
      "|    total_timesteps      | 94208       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.028438361 |\n",
      "|    clip_fraction        | 0.217       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.727      |\n",
      "|    explained_variance   | 0.899       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.91e+03    |\n",
      "|    n_updates            | 540         |\n",
      "|    policy_gradient_loss | -0.0184     |\n",
      "|    std                  | 0.5         |\n",
      "|    value_loss           | 7.6e+03     |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 72.5        |\n",
      "|    ep_rew_mean          | -572        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 203         |\n",
      "|    iterations           | 47          |\n",
      "|    time_elapsed         | 474         |\n",
      "|    total_timesteps      | 96256       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.031687833 |\n",
      "|    clip_fraction        | 0.271       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.725      |\n",
      "|    explained_variance   | 0.866       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.09e+03    |\n",
      "|    n_updates            | 550         |\n",
      "|    policy_gradient_loss | -0.00271    |\n",
      "|    std                  | 0.499       |\n",
      "|    value_loss           | 5.22e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 92          |\n",
      "|    ep_rew_mean          | -402        |\n",
      "| time/                   |             |\n",
      "|    fps                  | 203         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 483         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012278962 |\n",
      "|    clip_fraction        | 0.142       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.716      |\n",
      "|    explained_variance   | 0.698       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.71e+03    |\n",
      "|    n_updates            | 560         |\n",
      "|    policy_gradient_loss | -0.00636    |\n",
      "|    std                  | 0.491       |\n",
      "|    value_loss           | 3.54e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 110        |\n",
      "|    ep_rew_mean          | -233       |\n",
      "| time/                   |            |\n",
      "|    fps                  | 203        |\n",
      "|    iterations           | 49         |\n",
      "|    time_elapsed         | 493        |\n",
      "|    total_timesteps      | 100352     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.04918643 |\n",
      "|    clip_fraction        | 0.139      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.677     |\n",
      "|    explained_variance   | 0.683      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 234        |\n",
      "|    n_updates            | 570        |\n",
      "|    policy_gradient_loss | -0.00126   |\n",
      "|    std                  | 0.463      |\n",
      "|    value_loss           | 1.62e+03   |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 129         |\n",
      "|    ep_rew_mean          | -69.1       |\n",
      "| time/                   |             |\n",
      "|    fps                  | 203         |\n",
      "|    iterations           | 50          |\n",
      "|    time_elapsed         | 502         |\n",
      "|    total_timesteps      | 102400      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011730123 |\n",
      "|    clip_fraction        | 0.0679      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.628      |\n",
      "|    explained_variance   | 0.904       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 94.4        |\n",
      "|    n_updates            | 580         |\n",
      "|    policy_gradient_loss | 0.000141    |\n",
      "|    std                  | 0.444       |\n",
      "|    value_loss           | 1.09e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 147          |\n",
      "|    ep_rew_mean          | 86.4         |\n",
      "| time/                   |              |\n",
      "|    fps                  | 203          |\n",
      "|    iterations           | 51           |\n",
      "|    time_elapsed         | 512          |\n",
      "|    total_timesteps      | 104448       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0073347124 |\n",
      "|    clip_fraction        | 0.124        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.604       |\n",
      "|    explained_variance   | -0.164       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.45e+03     |\n",
      "|    n_updates            | 590          |\n",
      "|    policy_gradient_loss | 0.00814      |\n",
      "|    std                  | 0.441        |\n",
      "|    value_loss           | 5.33e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 165        |\n",
      "|    ep_rew_mean          | 248        |\n",
      "| time/                   |            |\n",
      "|    fps                  | 204        |\n",
      "|    iterations           | 52         |\n",
      "|    time_elapsed         | 521        |\n",
      "|    total_timesteps      | 106496     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01604343 |\n",
      "|    clip_fraction        | 0.0626     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.595     |\n",
      "|    explained_variance   | -2.88      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 121        |\n",
      "|    n_updates            | 600        |\n",
      "|    policy_gradient_loss | -0.00142   |\n",
      "|    std                  | 0.435      |\n",
      "|    value_loss           | 705        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 182         |\n",
      "|    ep_rew_mean          | 401         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 204         |\n",
      "|    iterations           | 53          |\n",
      "|    time_elapsed         | 531         |\n",
      "|    total_timesteps      | 108544      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006072307 |\n",
      "|    clip_fraction        | 0.143       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.574      |\n",
      "|    explained_variance   | -0.27       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 62.4        |\n",
      "|    n_updates            | 610         |\n",
      "|    policy_gradient_loss | 0.00562     |\n",
      "|    std                  | 0.424       |\n",
      "|    value_loss           | 315         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 201          |\n",
      "|    ep_rew_mean          | 578          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 204          |\n",
      "|    iterations           | 54           |\n",
      "|    time_elapsed         | 540          |\n",
      "|    total_timesteps      | 110592       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0070029665 |\n",
      "|    clip_fraction        | 0.0745       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.56        |\n",
      "|    explained_variance   | 0.373        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.7e+04      |\n",
      "|    n_updates            | 620          |\n",
      "|    policy_gradient_loss | -0.0101      |\n",
      "|    std                  | 0.423        |\n",
      "|    value_loss           | 2.93e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 219         |\n",
      "|    ep_rew_mean          | 725         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 204         |\n",
      "|    iterations           | 55          |\n",
      "|    time_elapsed         | 550         |\n",
      "|    total_timesteps      | 112640      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.012728394 |\n",
      "|    clip_fraction        | 0.135       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.558      |\n",
      "|    explained_variance   | 0.702       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 212         |\n",
      "|    n_updates            | 630         |\n",
      "|    policy_gradient_loss | -0.0148     |\n",
      "|    std                  | 0.422       |\n",
      "|    value_loss           | 3.36e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 233         |\n",
      "|    ep_rew_mean          | 838         |\n",
      "| time/                   |             |\n",
      "|    fps                  | 204         |\n",
      "|    iterations           | 56          |\n",
      "|    time_elapsed         | 560         |\n",
      "|    total_timesteps      | 114688      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004623296 |\n",
      "|    clip_fraction        | 0.0639      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.547      |\n",
      "|    explained_variance   | 0.892       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 71.2        |\n",
      "|    n_updates            | 640         |\n",
      "|    policy_gradient_loss | -0.00531    |\n",
      "|    std                  | 0.415       |\n",
      "|    value_loss           | 1.04e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 249          |\n",
      "|    ep_rew_mean          | 964          |\n",
      "| time/                   |              |\n",
      "|    fps                  | 204          |\n",
      "|    iterations           | 57           |\n",
      "|    time_elapsed         | 571          |\n",
      "|    total_timesteps      | 116736       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0042057475 |\n",
      "|    clip_fraction        | 0.0378       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.532       |\n",
      "|    explained_variance   | 0.981        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 36.7         |\n",
      "|    n_updates            | 650          |\n",
      "|    policy_gradient_loss | 0.000275     |\n",
      "|    std                  | 0.408        |\n",
      "|    value_loss           | 344          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 268         |\n",
      "|    ep_rew_mean          | 1.15e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 204         |\n",
      "|    iterations           | 58          |\n",
      "|    time_elapsed         | 581         |\n",
      "|    total_timesteps      | 118784      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010620185 |\n",
      "|    clip_fraction        | 0.047       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.506      |\n",
      "|    explained_variance   | 0.985       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 194         |\n",
      "|    n_updates            | 660         |\n",
      "|    policy_gradient_loss | -0.00756    |\n",
      "|    std                  | 0.395       |\n",
      "|    value_loss           | 561         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 283          |\n",
      "|    ep_rew_mean          | 1.3e+03      |\n",
      "| time/                   |              |\n",
      "|    fps                  | 204          |\n",
      "|    iterations           | 59           |\n",
      "|    time_elapsed         | 590          |\n",
      "|    total_timesteps      | 120832       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0062152906 |\n",
      "|    clip_fraction        | 0.0751       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.474       |\n",
      "|    explained_variance   | -1.66        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 37.9         |\n",
      "|    n_updates            | 670          |\n",
      "|    policy_gradient_loss | -0.0068      |\n",
      "|    std                  | 0.382        |\n",
      "|    value_loss           | 239          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 295          |\n",
      "|    ep_rew_mean          | 1.45e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 204          |\n",
      "|    iterations           | 60           |\n",
      "|    time_elapsed         | 600          |\n",
      "|    total_timesteps      | 122880       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0074992804 |\n",
      "|    clip_fraction        | 0.0501       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.445       |\n",
      "|    explained_variance   | 0.6          |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 49.2         |\n",
      "|    n_updates            | 680          |\n",
      "|    policy_gradient_loss | -0.00397     |\n",
      "|    std                  | 0.372        |\n",
      "|    value_loss           | 4.14e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 309         |\n",
      "|    ep_rew_mean          | 1.57e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 204         |\n",
      "|    iterations           | 61          |\n",
      "|    time_elapsed         | 611         |\n",
      "|    total_timesteps      | 124928      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006299083 |\n",
      "|    clip_fraction        | 0.0628      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.419      |\n",
      "|    explained_variance   | 0.735       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 40.3        |\n",
      "|    n_updates            | 690         |\n",
      "|    policy_gradient_loss | -0.00703    |\n",
      "|    std                  | 0.363       |\n",
      "|    value_loss           | 1.52e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 317          |\n",
      "|    ep_rew_mean          | 1.67e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 203          |\n",
      "|    iterations           | 62           |\n",
      "|    time_elapsed         | 622          |\n",
      "|    total_timesteps      | 126976       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0069336975 |\n",
      "|    clip_fraction        | 0.0901       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.394       |\n",
      "|    explained_variance   | 0.98         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 29.4         |\n",
      "|    n_updates            | 700          |\n",
      "|    policy_gradient_loss | -0.00126     |\n",
      "|    std                  | 0.353        |\n",
      "|    value_loss           | 107          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 291         |\n",
      "|    ep_rew_mean          | 1.42e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 203         |\n",
      "|    iterations           | 63          |\n",
      "|    time_elapsed         | 634         |\n",
      "|    total_timesteps      | 129024      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.030767497 |\n",
      "|    clip_fraction        | 0.061       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.372      |\n",
      "|    explained_variance   | -2.25       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 50.3        |\n",
      "|    n_updates            | 710         |\n",
      "|    policy_gradient_loss | -0.000148   |\n",
      "|    std                  | 0.348       |\n",
      "|    value_loss           | 136         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 291         |\n",
      "|    ep_rew_mean          | 1.44e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 202         |\n",
      "|    iterations           | 64          |\n",
      "|    time_elapsed         | 646         |\n",
      "|    total_timesteps      | 131072      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008784693 |\n",
      "|    clip_fraction        | 0.0983      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.364      |\n",
      "|    explained_variance   | 0.522       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.61e+04    |\n",
      "|    n_updates            | 720         |\n",
      "|    policy_gradient_loss | -0.0103     |\n",
      "|    std                  | 0.348       |\n",
      "|    value_loss           | 4.64e+04    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 287         |\n",
      "|    ep_rew_mean          | 1.41e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 202         |\n",
      "|    iterations           | 65          |\n",
      "|    time_elapsed         | 658         |\n",
      "|    total_timesteps      | 133120      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.013338587 |\n",
      "|    clip_fraction        | 0.169       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.362      |\n",
      "|    explained_variance   | 0.916       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 488         |\n",
      "|    n_updates            | 730         |\n",
      "|    policy_gradient_loss | -0.021      |\n",
      "|    std                  | 0.347       |\n",
      "|    value_loss           | 1.37e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 287         |\n",
      "|    ep_rew_mean          | 1.42e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 201         |\n",
      "|    iterations           | 66          |\n",
      "|    time_elapsed         | 670         |\n",
      "|    total_timesteps      | 135168      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009974139 |\n",
      "|    clip_fraction        | 0.173       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.354      |\n",
      "|    explained_variance   | 0.951       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 140         |\n",
      "|    n_updates            | 740         |\n",
      "|    policy_gradient_loss | -0.0118     |\n",
      "|    std                  | 0.343       |\n",
      "|    value_loss           | 428         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 297         |\n",
      "|    ep_rew_mean          | 1.54e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 201         |\n",
      "|    iterations           | 67          |\n",
      "|    time_elapsed         | 681         |\n",
      "|    total_timesteps      | 137216      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008793489 |\n",
      "|    clip_fraction        | 0.0758      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.337      |\n",
      "|    explained_variance   | -0.376      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 47.5        |\n",
      "|    n_updates            | 750         |\n",
      "|    policy_gradient_loss | -0.00668    |\n",
      "|    std                  | 0.335       |\n",
      "|    value_loss           | 116         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 308         |\n",
      "|    ep_rew_mean          | 1.66e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 201         |\n",
      "|    iterations           | 68          |\n",
      "|    time_elapsed         | 692         |\n",
      "|    total_timesteps      | 139264      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014610907 |\n",
      "|    clip_fraction        | 0.119       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.296      |\n",
      "|    explained_variance   | 0.122       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 19.2        |\n",
      "|    n_updates            | 760         |\n",
      "|    policy_gradient_loss | -0.00675    |\n",
      "|    std                  | 0.319       |\n",
      "|    value_loss           | 66.2        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 319         |\n",
      "|    ep_rew_mean          | 1.77e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 200         |\n",
      "|    iterations           | 69          |\n",
      "|    time_elapsed         | 704         |\n",
      "|    total_timesteps      | 141312      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010754393 |\n",
      "|    clip_fraction        | 0.11        |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.261      |\n",
      "|    explained_variance   | 0.97        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 117         |\n",
      "|    n_updates            | 770         |\n",
      "|    policy_gradient_loss | -0.00326    |\n",
      "|    std                  | 0.309       |\n",
      "|    value_loss           | 210         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 319          |\n",
      "|    ep_rew_mean          | 1.79e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 200          |\n",
      "|    iterations           | 70           |\n",
      "|    time_elapsed         | 715          |\n",
      "|    total_timesteps      | 143360       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038937733 |\n",
      "|    clip_fraction        | 0.0589       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.238       |\n",
      "|    explained_variance   | 0.69         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 107          |\n",
      "|    n_updates            | 780          |\n",
      "|    policy_gradient_loss | -0.00307     |\n",
      "|    std                  | 0.305        |\n",
      "|    value_loss           | 8.82e+03     |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 323        |\n",
      "|    ep_rew_mean          | 1.86e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 200        |\n",
      "|    iterations           | 71         |\n",
      "|    time_elapsed         | 726        |\n",
      "|    total_timesteps      | 145408     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00591149 |\n",
      "|    clip_fraction        | 0.0684     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.212     |\n",
      "|    explained_variance   | -0.0222    |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 15.6       |\n",
      "|    n_updates            | 790        |\n",
      "|    policy_gradient_loss | -0.0081    |\n",
      "|    std                  | 0.294      |\n",
      "|    value_loss           | 59.4       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 311         |\n",
      "|    ep_rew_mean          | 1.78e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 200         |\n",
      "|    iterations           | 72          |\n",
      "|    time_elapsed         | 736         |\n",
      "|    total_timesteps      | 147456      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.020039342 |\n",
      "|    clip_fraction        | 0.0698      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.183      |\n",
      "|    explained_variance   | 0.979       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 25.2        |\n",
      "|    n_updates            | 800         |\n",
      "|    policy_gradient_loss | -0.00364    |\n",
      "|    std                  | 0.287       |\n",
      "|    value_loss           | 347         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 266          |\n",
      "|    ep_rew_mean          | 1.34e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 199          |\n",
      "|    iterations           | 73           |\n",
      "|    time_elapsed         | 747          |\n",
      "|    total_timesteps      | 149504       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0027511748 |\n",
      "|    clip_fraction        | 0.02         |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.17        |\n",
      "|    explained_variance   | 0.134        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.58e+04     |\n",
      "|    n_updates            | 810          |\n",
      "|    policy_gradient_loss | -0.00229     |\n",
      "|    std                  | 0.287        |\n",
      "|    value_loss           | 5.97e+04     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 264          |\n",
      "|    ep_rew_mean          | 1.35e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 199          |\n",
      "|    iterations           | 74           |\n",
      "|    time_elapsed         | 758          |\n",
      "|    total_timesteps      | 151552       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0076812664 |\n",
      "|    clip_fraction        | 0.0756       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.17        |\n",
      "|    explained_variance   | 0.788        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 8.12e+03     |\n",
      "|    n_updates            | 820          |\n",
      "|    policy_gradient_loss | -0.0104      |\n",
      "|    std                  | 0.287        |\n",
      "|    value_loss           | 2.45e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 264         |\n",
      "|    ep_rew_mean          | 1.36e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 199         |\n",
      "|    iterations           | 75          |\n",
      "|    time_elapsed         | 768         |\n",
      "|    total_timesteps      | 153600      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009820415 |\n",
      "|    clip_fraction        | 0.103       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.169      |\n",
      "|    explained_variance   | 0.438       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 5.15e+03    |\n",
      "|    n_updates            | 830         |\n",
      "|    policy_gradient_loss | -0.0138     |\n",
      "|    std                  | 0.287       |\n",
      "|    value_loss           | 1.6e+04     |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 274         |\n",
      "|    ep_rew_mean          | 1.49e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 199         |\n",
      "|    iterations           | 76          |\n",
      "|    time_elapsed         | 779         |\n",
      "|    total_timesteps      | 155648      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008014189 |\n",
      "|    clip_fraction        | 0.128       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.166      |\n",
      "|    explained_variance   | -0.5        |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 215         |\n",
      "|    n_updates            | 840         |\n",
      "|    policy_gradient_loss | -0.0057     |\n",
      "|    std                  | 0.285       |\n",
      "|    value_loss           | 574         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 287        |\n",
      "|    ep_rew_mean          | 1.63e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 199        |\n",
      "|    iterations           | 77         |\n",
      "|    time_elapsed         | 789        |\n",
      "|    total_timesteps      | 157696     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.43490613 |\n",
      "|    clip_fraction        | 0.136      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.16      |\n",
      "|    explained_variance   | 0.925      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 85.9       |\n",
      "|    n_updates            | 850        |\n",
      "|    policy_gradient_loss | 0.00158    |\n",
      "|    std                  | 0.283      |\n",
      "|    value_loss           | 569        |\n",
      "----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 291        |\n",
      "|    ep_rew_mean          | 1.69e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 199        |\n",
      "|    iterations           | 78         |\n",
      "|    time_elapsed         | 800        |\n",
      "|    total_timesteps      | 159744     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00523775 |\n",
      "|    clip_fraction        | 0.0769     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | -0.145     |\n",
      "|    explained_variance   | -14.6      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 18.7       |\n",
      "|    n_updates            | 860        |\n",
      "|    policy_gradient_loss | 0.00422    |\n",
      "|    std                  | 0.275      |\n",
      "|    value_loss           | 366        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 291         |\n",
      "|    ep_rew_mean          | 1.71e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 199         |\n",
      "|    iterations           | 79          |\n",
      "|    time_elapsed         | 810         |\n",
      "|    total_timesteps      | 161792      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.027603101 |\n",
      "|    clip_fraction        | 0.166       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.122      |\n",
      "|    explained_variance   | 0.674       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 29.4        |\n",
      "|    n_updates            | 870         |\n",
      "|    policy_gradient_loss | 0.0053      |\n",
      "|    std                  | 0.271       |\n",
      "|    value_loss           | 609         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 277         |\n",
      "|    ep_rew_mean          | 1.55e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 199         |\n",
      "|    iterations           | 80          |\n",
      "|    time_elapsed         | 821         |\n",
      "|    total_timesteps      | 163840      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008379262 |\n",
      "|    clip_fraction        | 0.0633      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.107      |\n",
      "|    explained_variance   | -0.132      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 22          |\n",
      "|    n_updates            | 880         |\n",
      "|    policy_gradient_loss | -0.00606    |\n",
      "|    std                  | 0.267       |\n",
      "|    value_loss           | 85.2        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 273          |\n",
      "|    ep_rew_mean          | 1.51e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 199          |\n",
      "|    iterations           | 81           |\n",
      "|    time_elapsed         | 833          |\n",
      "|    total_timesteps      | 165888       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0030336783 |\n",
      "|    clip_fraction        | 0.0207       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.0959      |\n",
      "|    explained_variance   | 0.777        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.1e+04      |\n",
      "|    n_updates            | 890          |\n",
      "|    policy_gradient_loss | 0.00275      |\n",
      "|    std                  | 0.266        |\n",
      "|    value_loss           | 2.71e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 277         |\n",
      "|    ep_rew_mean          | 1.57e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 199         |\n",
      "|    iterations           | 82          |\n",
      "|    time_elapsed         | 843         |\n",
      "|    total_timesteps      | 167936      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.004911857 |\n",
      "|    clip_fraction        | 0.0581      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.0888     |\n",
      "|    explained_variance   | 0.961       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 32.9        |\n",
      "|    n_updates            | 900         |\n",
      "|    policy_gradient_loss | -0.00164    |\n",
      "|    std                  | 0.262       |\n",
      "|    value_loss           | 95.5        |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 273       |\n",
      "|    ep_rew_mean          | 1.53e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 198       |\n",
      "|    iterations           | 83        |\n",
      "|    time_elapsed         | 854       |\n",
      "|    total_timesteps      | 169984    |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.6147424 |\n",
      "|    clip_fraction        | 0.164     |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | -0.0676   |\n",
      "|    explained_variance   | -0.0772   |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 36.5      |\n",
      "|    n_updates            | 910       |\n",
      "|    policy_gradient_loss | 0.00853   |\n",
      "|    std                  | 0.258     |\n",
      "|    value_loss           | 52.8      |\n",
      "---------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 269          |\n",
      "|    ep_rew_mean          | 1.48e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 198          |\n",
      "|    iterations           | 84           |\n",
      "|    time_elapsed         | 864          |\n",
      "|    total_timesteps      | 172032       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0043838993 |\n",
      "|    clip_fraction        | 0.0919       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.0611      |\n",
      "|    explained_variance   | 0.891        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 35.2         |\n",
      "|    n_updates            | 920          |\n",
      "|    policy_gradient_loss | 0.000635     |\n",
      "|    std                  | 0.257        |\n",
      "|    value_loss           | 6.16e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 281         |\n",
      "|    ep_rew_mean          | 1.61e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 198         |\n",
      "|    iterations           | 85          |\n",
      "|    time_elapsed         | 875         |\n",
      "|    total_timesteps      | 174080      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.053008918 |\n",
      "|    clip_fraction        | 0.0502      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.0551     |\n",
      "|    explained_variance   | 0.987       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 26.2        |\n",
      "|    n_updates            | 930         |\n",
      "|    policy_gradient_loss | -0.00732    |\n",
      "|    std                  | 0.255       |\n",
      "|    value_loss           | 379         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 282         |\n",
      "|    ep_rew_mean          | 1.63e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 198         |\n",
      "|    iterations           | 86          |\n",
      "|    time_elapsed         | 886         |\n",
      "|    total_timesteps      | 176128      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011216045 |\n",
      "|    clip_fraction        | 0.265       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.0402     |\n",
      "|    explained_variance   | -0.0136     |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 18.9        |\n",
      "|    n_updates            | 940         |\n",
      "|    policy_gradient_loss | 0.0572      |\n",
      "|    std                  | 0.25        |\n",
      "|    value_loss           | 48.3        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 297         |\n",
      "|    ep_rew_mean          | 1.79e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 198         |\n",
      "|    iterations           | 87          |\n",
      "|    time_elapsed         | 897         |\n",
      "|    total_timesteps      | 178176      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005582522 |\n",
      "|    clip_fraction        | 0.0443      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | -0.0329     |\n",
      "|    explained_variance   | 0.868       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.6e+03     |\n",
      "|    n_updates            | 950         |\n",
      "|    policy_gradient_loss | -0.00517    |\n",
      "|    std                  | 0.25        |\n",
      "|    value_loss           | 8.3e+03     |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 312          |\n",
      "|    ep_rew_mean          | 1.97e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 198          |\n",
      "|    iterations           | 88           |\n",
      "|    time_elapsed         | 909          |\n",
      "|    total_timesteps      | 180224       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0039609955 |\n",
      "|    clip_fraction        | 0.0634       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.0227      |\n",
      "|    explained_variance   | 0.00576      |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 77.7         |\n",
      "|    n_updates            | 960          |\n",
      "|    policy_gradient_loss | -0.00928     |\n",
      "|    std                  | 0.246        |\n",
      "|    value_loss           | 105          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 321          |\n",
      "|    ep_rew_mean          | 2.06e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 198          |\n",
      "|    iterations           | 89           |\n",
      "|    time_elapsed         | 920          |\n",
      "|    total_timesteps      | 182272       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0061425995 |\n",
      "|    clip_fraction        | 0.0522       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | -0.0113      |\n",
      "|    explained_variance   | 0.952        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 29.1         |\n",
      "|    n_updates            | 970          |\n",
      "|    policy_gradient_loss | -0.00278     |\n",
      "|    std                  | 0.243        |\n",
      "|    value_loss           | 447          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 317          |\n",
      "|    ep_rew_mean          | 2e+03        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 197          |\n",
      "|    iterations           | 90           |\n",
      "|    time_elapsed         | 932          |\n",
      "|    total_timesteps      | 184320       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0067885574 |\n",
      "|    clip_fraction        | 0.0604       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.00728      |\n",
      "|    explained_variance   | 0.973        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.68         |\n",
      "|    n_updates            | 980          |\n",
      "|    policy_gradient_loss | 0.00103      |\n",
      "|    std                  | 0.237        |\n",
      "|    value_loss           | 340          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 325          |\n",
      "|    ep_rew_mean          | 2.11e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 197          |\n",
      "|    iterations           | 91           |\n",
      "|    time_elapsed         | 944          |\n",
      "|    total_timesteps      | 186368       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0028046458 |\n",
      "|    clip_fraction        | 0.0169       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.0323       |\n",
      "|    explained_variance   | 0.947        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 25.1         |\n",
      "|    n_updates            | 990          |\n",
      "|    policy_gradient_loss | -0.00163     |\n",
      "|    std                  | 0.231        |\n",
      "|    value_loss           | 467          |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 317         |\n",
      "|    ep_rew_mean          | 2.03e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 197         |\n",
      "|    iterations           | 92          |\n",
      "|    time_elapsed         | 954         |\n",
      "|    total_timesteps      | 188416      |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.010945013 |\n",
      "|    clip_fraction        | 0.0819      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.0635      |\n",
      "|    explained_variance   | -0.116      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 13.1        |\n",
      "|    n_updates            | 1000        |\n",
      "|    policy_gradient_loss | -0.00646    |\n",
      "|    std                  | 0.224       |\n",
      "|    value_loss           | 37.9        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 313          |\n",
      "|    ep_rew_mean          | 1.99e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 197          |\n",
      "|    iterations           | 93           |\n",
      "|    time_elapsed         | 965          |\n",
      "|    total_timesteps      | 190464       |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0008761134 |\n",
      "|    clip_fraction        | 0.00322      |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.0797       |\n",
      "|    explained_variance   | 0.745        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.34e+03     |\n",
      "|    n_updates            | 1010         |\n",
      "|    policy_gradient_loss | -0.000802    |\n",
      "|    std                  | 0.223        |\n",
      "|    value_loss           | 6.66e+03     |\n",
      "------------------------------------------\n"
     ]
    }
   ],
   "source": [
    "model = model.learn(total_timesteps=190000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:3034.17 +/- 2227.16\n"
     ]
    }
   ],
   "source": [
    "# Agent, after 2,000,000 steps\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"ppo_acc_bigger_200000_steps\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "---------------------------------\n",
      "| rollout/           |          |\n",
      "|    ep_len_mean     | 329      |\n",
      "|    ep_rew_mean     | 2.29e+03 |\n",
      "| time/              |          |\n",
      "|    fps             | 354      |\n",
      "|    iterations      | 1        |\n",
      "|    time_elapsed    | 5        |\n",
      "|    total_timesteps | 2048     |\n",
      "---------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 286          |\n",
      "|    ep_rew_mean          | 1.71e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 256          |\n",
      "|    iterations           | 2            |\n",
      "|    time_elapsed         | 15           |\n",
      "|    total_timesteps      | 4096         |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0018355271 |\n",
      "|    clip_fraction        | 0.0541       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.0966       |\n",
      "|    explained_variance   | 0.995        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 8.44         |\n",
      "|    n_updates            | 1030         |\n",
      "|    policy_gradient_loss | 0.00243      |\n",
      "|    std                  | 0.218        |\n",
      "|    value_loss           | 63.6         |\n",
      "------------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 304        |\n",
      "|    ep_rew_mean          | 1.94e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 234        |\n",
      "|    iterations           | 3          |\n",
      "|    time_elapsed         | 26         |\n",
      "|    total_timesteps      | 6144       |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.01089158 |\n",
      "|    clip_fraction        | 0.0966     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.113      |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 154        |\n",
      "|    n_updates            | 1040       |\n",
      "|    policy_gradient_loss | -0.0026    |\n",
      "|    std                  | 0.214      |\n",
      "|    value_loss           | 142        |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 254         |\n",
      "|    ep_rew_mean          | 1.28e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 223         |\n",
      "|    iterations           | 4           |\n",
      "|    time_elapsed         | 36          |\n",
      "|    total_timesteps      | 8192        |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006984965 |\n",
      "|    clip_fraction        | 0.167       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.139       |\n",
      "|    explained_variance   | 0.674       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 24.8        |\n",
      "|    n_updates            | 1050        |\n",
      "|    policy_gradient_loss | 0.0184      |\n",
      "|    std                  | 0.208       |\n",
      "|    value_loss           | 437         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 248        |\n",
      "|    ep_rew_mean          | 1.22e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 218        |\n",
      "|    iterations           | 5          |\n",
      "|    time_elapsed         | 46         |\n",
      "|    total_timesteps      | 10240      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.05414567 |\n",
      "|    clip_fraction        | 0.0583     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.156      |\n",
      "|    explained_variance   | 0.855      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 72.8       |\n",
      "|    n_updates            | 1060       |\n",
      "|    policy_gradient_loss | 7.14e-05   |\n",
      "|    std                  | 0.207      |\n",
      "|    value_loss           | 1.57e+03   |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 266          |\n",
      "|    ep_rew_mean          | 1.46e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 214          |\n",
      "|    iterations           | 6            |\n",
      "|    time_elapsed         | 57           |\n",
      "|    total_timesteps      | 12288        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0038215509 |\n",
      "|    clip_fraction        | 0.0661       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.171        |\n",
      "|    explained_variance   | 0.999        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 49.5         |\n",
      "|    n_updates            | 1070         |\n",
      "|    policy_gradient_loss | 0.00335      |\n",
      "|    std                  | 0.201        |\n",
      "|    value_loss           | 74.7         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 271         |\n",
      "|    ep_rew_mean          | 1.55e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 214         |\n",
      "|    iterations           | 7           |\n",
      "|    time_elapsed         | 66          |\n",
      "|    total_timesteps      | 14336       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.017797392 |\n",
      "|    clip_fraction        | 0.0671      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.198       |\n",
      "|    explained_variance   | -11.6       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 20.9        |\n",
      "|    n_updates            | 1080        |\n",
      "|    policy_gradient_loss | -0.00234    |\n",
      "|    std                  | 0.196       |\n",
      "|    value_loss           | 69.2        |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 283          |\n",
      "|    ep_rew_mean          | 1.71e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 213          |\n",
      "|    iterations           | 8            |\n",
      "|    time_elapsed         | 76           |\n",
      "|    total_timesteps      | 16384        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0074317236 |\n",
      "|    clip_fraction        | 0.0207       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.211        |\n",
      "|    explained_variance   | 0.731        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 3.32e+03     |\n",
      "|    n_updates            | 1090         |\n",
      "|    policy_gradient_loss | 0.00155      |\n",
      "|    std                  | 0.196        |\n",
      "|    value_loss           | 8.45e+03     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 280         |\n",
      "|    ep_rew_mean          | 1.67e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 213         |\n",
      "|    iterations           | 9           |\n",
      "|    time_elapsed         | 86          |\n",
      "|    total_timesteps      | 18432       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.021463396 |\n",
      "|    clip_fraction        | 0.0933      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.212       |\n",
      "|    explained_variance   | -0.376      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 96.9        |\n",
      "|    n_updates            | 1100        |\n",
      "|    policy_gradient_loss | -0.00687    |\n",
      "|    std                  | 0.196       |\n",
      "|    value_loss           | 469         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 259        |\n",
      "|    ep_rew_mean          | 1.35e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 213        |\n",
      "|    iterations           | 10         |\n",
      "|    time_elapsed         | 95         |\n",
      "|    total_timesteps      | 20480      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.17814037 |\n",
      "|    clip_fraction        | 0.124      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.215      |\n",
      "|    explained_variance   | 0.968      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 28.1       |\n",
      "|    n_updates            | 1110       |\n",
      "|    policy_gradient_loss | -0.0046    |\n",
      "|    std                  | 0.194      |\n",
      "|    value_loss           | 122        |\n",
      "----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 261          |\n",
      "|    ep_rew_mean          | 1.36e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 213          |\n",
      "|    iterations           | 11           |\n",
      "|    time_elapsed         | 105          |\n",
      "|    total_timesteps      | 22528        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0035123213 |\n",
      "|    clip_fraction        | 0.0283       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.219        |\n",
      "|    explained_variance   | 0.507        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.83e+03     |\n",
      "|    n_updates            | 1120         |\n",
      "|    policy_gradient_loss | -0.0037      |\n",
      "|    std                  | 0.194        |\n",
      "|    value_loss           | 2.12e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 257         |\n",
      "|    ep_rew_mean          | 1.32e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 213         |\n",
      "|    iterations           | 12          |\n",
      "|    time_elapsed         | 115         |\n",
      "|    total_timesteps      | 24576       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.011432875 |\n",
      "|    clip_fraction        | 0.206       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.219       |\n",
      "|    explained_variance   | 0.928       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 549         |\n",
      "|    n_updates            | 1130        |\n",
      "|    policy_gradient_loss | -0.0132     |\n",
      "|    std                  | 0.194       |\n",
      "|    value_loss           | 4.83e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 249          |\n",
      "|    ep_rew_mean          | 1.22e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 213          |\n",
      "|    iterations           | 13           |\n",
      "|    time_elapsed         | 124          |\n",
      "|    total_timesteps      | 26624        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0052536456 |\n",
      "|    clip_fraction        | 0.0249       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.219        |\n",
      "|    explained_variance   | 0.83         |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 393          |\n",
      "|    n_updates            | 1140         |\n",
      "|    policy_gradient_loss | -0.00267     |\n",
      "|    std                  | 0.194        |\n",
      "|    value_loss           | 1.32e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 249         |\n",
      "|    ep_rew_mean          | 1.22e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 213         |\n",
      "|    iterations           | 14          |\n",
      "|    time_elapsed         | 134         |\n",
      "|    total_timesteps      | 28672       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.016378175 |\n",
      "|    clip_fraction        | 0.106       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.22        |\n",
      "|    explained_variance   | 0.991       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 504         |\n",
      "|    n_updates            | 1150        |\n",
      "|    policy_gradient_loss | -0.00984    |\n",
      "|    std                  | 0.194       |\n",
      "|    value_loss           | 684         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 249          |\n",
      "|    ep_rew_mean          | 1.23e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 213          |\n",
      "|    iterations           | 15           |\n",
      "|    time_elapsed         | 143          |\n",
      "|    total_timesteps      | 30720        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0076448363 |\n",
      "|    clip_fraction        | 0.0778       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.228        |\n",
      "|    explained_variance   | 0.995        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 21.3         |\n",
      "|    n_updates            | 1160         |\n",
      "|    policy_gradient_loss | -0.00129     |\n",
      "|    std                  | 0.191        |\n",
      "|    value_loss           | 171          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 253         |\n",
      "|    ep_rew_mean          | 1.29e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 213         |\n",
      "|    iterations           | 16          |\n",
      "|    time_elapsed         | 153         |\n",
      "|    total_timesteps      | 32768       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015651155 |\n",
      "|    clip_fraction        | 0.144       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.242       |\n",
      "|    explained_variance   | 0.992       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 50.8        |\n",
      "|    n_updates            | 1170        |\n",
      "|    policy_gradient_loss | -0.00875    |\n",
      "|    std                  | 0.189       |\n",
      "|    value_loss           | 202         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 253          |\n",
      "|    ep_rew_mean          | 1.29e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 213          |\n",
      "|    iterations           | 17           |\n",
      "|    time_elapsed         | 163          |\n",
      "|    total_timesteps      | 34816        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0034369892 |\n",
      "|    clip_fraction        | 0.0561       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.26         |\n",
      "|    explained_variance   | 0.997        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 9.27         |\n",
      "|    n_updates            | 1180         |\n",
      "|    policy_gradient_loss | 0.00319      |\n",
      "|    std                  | 0.185        |\n",
      "|    value_loss           | 143          |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 241          |\n",
      "|    ep_rew_mean          | 1.14e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 213          |\n",
      "|    iterations           | 18           |\n",
      "|    time_elapsed         | 172          |\n",
      "|    total_timesteps      | 36864        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0052868216 |\n",
      "|    clip_fraction        | 0.0771       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.28         |\n",
      "|    explained_variance   | 0.998        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 159          |\n",
      "|    n_updates            | 1190         |\n",
      "|    policy_gradient_loss | 0.00261      |\n",
      "|    std                  | 0.181        |\n",
      "|    value_loss           | 195          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 249         |\n",
      "|    ep_rew_mean          | 1.23e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 212         |\n",
      "|    iterations           | 19          |\n",
      "|    time_elapsed         | 183         |\n",
      "|    total_timesteps      | 38912       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.025973491 |\n",
      "|    clip_fraction        | 0.0722      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.295       |\n",
      "|    explained_variance   | 0.995       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 93.4        |\n",
      "|    n_updates            | 1200        |\n",
      "|    policy_gradient_loss | 0.0118      |\n",
      "|    std                  | 0.179       |\n",
      "|    value_loss           | 130         |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 236         |\n",
      "|    ep_rew_mean          | 1.08e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 211         |\n",
      "|    iterations           | 20          |\n",
      "|    time_elapsed         | 193         |\n",
      "|    total_timesteps      | 40960       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.029134642 |\n",
      "|    clip_fraction        | 0.118       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.317       |\n",
      "|    explained_variance   | 0.272       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 10.4        |\n",
      "|    n_updates            | 1210        |\n",
      "|    policy_gradient_loss | -0.000315   |\n",
      "|    std                  | 0.174       |\n",
      "|    value_loss           | 47.8        |\n",
      "-----------------------------------------\n",
      "---------------------------------------\n",
      "| rollout/                |           |\n",
      "|    ep_len_mean          | 241       |\n",
      "|    ep_rew_mean          | 1.14e+03  |\n",
      "| time/                   |           |\n",
      "|    fps                  | 212       |\n",
      "|    iterations           | 21        |\n",
      "|    time_elapsed         | 202       |\n",
      "|    total_timesteps      | 43008     |\n",
      "| train/                  |           |\n",
      "|    approx_kl            | 0.0062469 |\n",
      "|    clip_fraction        | 0.0695    |\n",
      "|    clip_range           | 0.2       |\n",
      "|    entropy_loss         | 0.332     |\n",
      "|    explained_variance   | 0.994     |\n",
      "|    learning_rate        | 0.0003    |\n",
      "|    loss                 | 155       |\n",
      "|    n_updates            | 1220      |\n",
      "|    policy_gradient_loss | -0.00481  |\n",
      "|    std                  | 0.173     |\n",
      "|    value_loss           | 308       |\n",
      "---------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 249        |\n",
      "|    ep_rew_mean          | 1.27e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 211        |\n",
      "|    iterations           | 22         |\n",
      "|    time_elapsed         | 212        |\n",
      "|    total_timesteps      | 45056      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.00511006 |\n",
      "|    clip_fraction        | 0.154      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.345      |\n",
      "|    explained_variance   | 0.39       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 106        |\n",
      "|    n_updates            | 1230       |\n",
      "|    policy_gradient_loss | 0.0142     |\n",
      "|    std                  | 0.171      |\n",
      "|    value_loss           | 98.2       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 259         |\n",
      "|    ep_rew_mean          | 1.44e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 212         |\n",
      "|    iterations           | 23          |\n",
      "|    time_elapsed         | 222         |\n",
      "|    total_timesteps      | 47104       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.005272828 |\n",
      "|    clip_fraction        | 0.0737      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.35        |\n",
      "|    explained_variance   | 0.976       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 29.5        |\n",
      "|    n_updates            | 1240        |\n",
      "|    policy_gradient_loss | 0.000815    |\n",
      "|    std                  | 0.17        |\n",
      "|    value_loss           | 1.55e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 253         |\n",
      "|    ep_rew_mean          | 1.36e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 211         |\n",
      "|    iterations           | 24          |\n",
      "|    time_elapsed         | 231         |\n",
      "|    total_timesteps      | 49152       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009685521 |\n",
      "|    clip_fraction        | 0.0407      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.356       |\n",
      "|    explained_variance   | 0.964       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 11.3        |\n",
      "|    n_updates            | 1250        |\n",
      "|    policy_gradient_loss | -0.000444   |\n",
      "|    std                  | 0.169       |\n",
      "|    value_loss           | 195         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 249         |\n",
      "|    ep_rew_mean          | 1.31e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 211         |\n",
      "|    iterations           | 25          |\n",
      "|    time_elapsed         | 241         |\n",
      "|    total_timesteps      | 51200       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.017586615 |\n",
      "|    clip_fraction        | 0.0531      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.362       |\n",
      "|    explained_variance   | 0.794       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 206         |\n",
      "|    n_updates            | 1260        |\n",
      "|    policy_gradient_loss | 0.000458    |\n",
      "|    std                  | 0.168       |\n",
      "|    value_loss           | 4.97e+03    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 257        |\n",
      "|    ep_rew_mean          | 1.42e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 211        |\n",
      "|    iterations           | 26         |\n",
      "|    time_elapsed         | 251        |\n",
      "|    total_timesteps      | 53248      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02542887 |\n",
      "|    clip_fraction        | 0.139      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.371      |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 7.43       |\n",
      "|    n_updates            | 1270       |\n",
      "|    policy_gradient_loss | -0.00309   |\n",
      "|    std                  | 0.166      |\n",
      "|    value_loss           | 84.5       |\n",
      "----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 253         |\n",
      "|    ep_rew_mean          | 1.37e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 211         |\n",
      "|    iterations           | 27          |\n",
      "|    time_elapsed         | 261         |\n",
      "|    total_timesteps      | 55296       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.014868987 |\n",
      "|    clip_fraction        | 0.0627      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.384       |\n",
      "|    explained_variance   | 0.999       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 6.99        |\n",
      "|    n_updates            | 1280        |\n",
      "|    policy_gradient_loss | -0.00573    |\n",
      "|    std                  | 0.164       |\n",
      "|    value_loss           | 109         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 257         |\n",
      "|    ep_rew_mean          | 1.43e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 211         |\n",
      "|    iterations           | 28          |\n",
      "|    time_elapsed         | 270         |\n",
      "|    total_timesteps      | 57344       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.055503435 |\n",
      "|    clip_fraction        | 0.101       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.394       |\n",
      "|    explained_variance   | 0.768       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 4.88e+03    |\n",
      "|    n_updates            | 1290        |\n",
      "|    policy_gradient_loss | -0.00224    |\n",
      "|    std                  | 0.163       |\n",
      "|    value_loss           | 5.87e+03    |\n",
      "-----------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 261         |\n",
      "|    ep_rew_mean          | 1.49e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 29          |\n",
      "|    time_elapsed         | 282         |\n",
      "|    total_timesteps      | 59392       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.021533055 |\n",
      "|    clip_fraction        | 0.135       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.397       |\n",
      "|    explained_variance   | 0.253       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 223         |\n",
      "|    n_updates            | 1300        |\n",
      "|    policy_gradient_loss | 0.0164      |\n",
      "|    std                  | 0.162       |\n",
      "|    value_loss           | 4.86e+03    |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 265         |\n",
      "|    ep_rew_mean          | 1.54e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 210         |\n",
      "|    iterations           | 30          |\n",
      "|    time_elapsed         | 292         |\n",
      "|    total_timesteps      | 61440       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.008745292 |\n",
      "|    clip_fraction        | 0.0811      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.408       |\n",
      "|    explained_variance   | 0.933       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 15.3        |\n",
      "|    n_updates            | 1310        |\n",
      "|    policy_gradient_loss | 0.0153      |\n",
      "|    std                  | 0.16        |\n",
      "|    value_loss           | 670         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 273          |\n",
      "|    ep_rew_mean          | 1.66e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 210          |\n",
      "|    iterations           | 31           |\n",
      "|    time_elapsed         | 302          |\n",
      "|    total_timesteps      | 63488        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0091923615 |\n",
      "|    clip_fraction        | 0.062        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.422        |\n",
      "|    explained_variance   | 0.958        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 85.3         |\n",
      "|    n_updates            | 1320         |\n",
      "|    policy_gradient_loss | -0.00387     |\n",
      "|    std                  | 0.158        |\n",
      "|    value_loss           | 595          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 281         |\n",
      "|    ep_rew_mean          | 1.77e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 32          |\n",
      "|    time_elapsed         | 312         |\n",
      "|    total_timesteps      | 65536       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009354741 |\n",
      "|    clip_fraction        | 0.089       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.437       |\n",
      "|    explained_variance   | -24         |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 13.9        |\n",
      "|    n_updates            | 1330        |\n",
      "|    policy_gradient_loss | 0.00173     |\n",
      "|    std                  | 0.155       |\n",
      "|    value_loss           | 733         |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 281          |\n",
      "|    ep_rew_mean          | 1.77e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 33           |\n",
      "|    time_elapsed         | 322          |\n",
      "|    total_timesteps      | 67584        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0055734143 |\n",
      "|    clip_fraction        | 0.0945       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.457        |\n",
      "|    explained_variance   | -11.3        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 11.3         |\n",
      "|    n_updates            | 1340         |\n",
      "|    policy_gradient_loss | 0.00107      |\n",
      "|    std                  | 0.152        |\n",
      "|    value_loss           | 340          |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 289         |\n",
      "|    ep_rew_mean          | 1.88e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 34          |\n",
      "|    time_elapsed         | 332         |\n",
      "|    total_timesteps      | 69632       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.021202398 |\n",
      "|    clip_fraction        | 0.079       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.482       |\n",
      "|    explained_variance   | 0.811       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 7.1         |\n",
      "|    n_updates            | 1350        |\n",
      "|    policy_gradient_loss | -0.000406   |\n",
      "|    std                  | 0.148       |\n",
      "|    value_loss           | 315         |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 277        |\n",
      "|    ep_rew_mean          | 1.72e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 209        |\n",
      "|    iterations           | 35         |\n",
      "|    time_elapsed         | 341        |\n",
      "|    total_timesteps      | 71680      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.02524174 |\n",
      "|    clip_fraction        | 0.227      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.501      |\n",
      "|    explained_variance   | 0.97       |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 75.7       |\n",
      "|    n_updates            | 1360       |\n",
      "|    policy_gradient_loss | 0.0425     |\n",
      "|    std                  | 0.146      |\n",
      "|    value_loss           | 368        |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 289          |\n",
      "|    ep_rew_mean          | 1.89e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 36           |\n",
      "|    time_elapsed         | 351          |\n",
      "|    total_timesteps      | 73728        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0066146743 |\n",
      "|    clip_fraction        | 0.132        |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.511        |\n",
      "|    explained_variance   | 0.995        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 6.11         |\n",
      "|    n_updates            | 1370         |\n",
      "|    policy_gradient_loss | 0.00551      |\n",
      "|    std                  | 0.144        |\n",
      "|    value_loss           | 82.6         |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 293          |\n",
      "|    ep_rew_mean          | 1.95e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 37           |\n",
      "|    time_elapsed         | 361          |\n",
      "|    total_timesteps      | 75776        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0039055091 |\n",
      "|    clip_fraction        | 0.0248       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.524        |\n",
      "|    explained_variance   | -0.349       |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 52.9         |\n",
      "|    n_updates            | 1380         |\n",
      "|    policy_gradient_loss | -0.00659     |\n",
      "|    std                  | 0.143        |\n",
      "|    value_loss           | 56.8         |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 301        |\n",
      "|    ep_rew_mean          | 2.07e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 209        |\n",
      "|    iterations           | 38         |\n",
      "|    time_elapsed         | 371        |\n",
      "|    total_timesteps      | 77824      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.10465785 |\n",
      "|    clip_fraction        | 0.149      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.535      |\n",
      "|    explained_variance   | 0.998      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 3.3        |\n",
      "|    n_updates            | 1390       |\n",
      "|    policy_gradient_loss | 0.036      |\n",
      "|    std                  | 0.141      |\n",
      "|    value_loss           | 89.7       |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 293          |\n",
      "|    ep_rew_mean          | 1.96e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 39           |\n",
      "|    time_elapsed         | 381          |\n",
      "|    total_timesteps      | 79872        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0055854097 |\n",
      "|    clip_fraction        | 0.0917       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.552        |\n",
      "|    explained_variance   | 0.998        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 11.9         |\n",
      "|    n_updates            | 1400         |\n",
      "|    policy_gradient_loss | 0.0083       |\n",
      "|    std                  | 0.138        |\n",
      "|    value_loss           | 89.4         |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 289         |\n",
      "|    ep_rew_mean          | 1.9e+03     |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 40          |\n",
      "|    time_elapsed         | 391         |\n",
      "|    total_timesteps      | 81920       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.041125886 |\n",
      "|    clip_fraction        | 0.163       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.567       |\n",
      "|    explained_variance   | 0.906       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 167         |\n",
      "|    n_updates            | 1410        |\n",
      "|    policy_gradient_loss | 0.0149      |\n",
      "|    std                  | 0.137       |\n",
      "|    value_loss           | 1.28e+03    |\n",
      "-----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 286          |\n",
      "|    ep_rew_mean          | 1.88e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 41           |\n",
      "|    time_elapsed         | 401          |\n",
      "|    total_timesteps      | 83968        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0023567213 |\n",
      "|    clip_fraction        | 0.0264       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.572        |\n",
      "|    explained_variance   | 0.833        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 1.33e+04     |\n",
      "|    n_updates            | 1420         |\n",
      "|    policy_gradient_loss | 0.000165     |\n",
      "|    std                  | 0.137        |\n",
      "|    value_loss           | 1.29e+04     |\n",
      "------------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 294          |\n",
      "|    ep_rew_mean          | 2e+03        |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 42           |\n",
      "|    time_elapsed         | 411          |\n",
      "|    total_timesteps      | 86016        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0060936185 |\n",
      "|    clip_fraction        | 0.0259       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.572        |\n",
      "|    explained_variance   | 0.758        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 2.73e+03     |\n",
      "|    n_updates            | 1430         |\n",
      "|    policy_gradient_loss | -0.00293     |\n",
      "|    std                  | 0.137        |\n",
      "|    value_loss           | 1.58e+04     |\n",
      "------------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 282         |\n",
      "|    ep_rew_mean          | 1.86e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 43          |\n",
      "|    time_elapsed         | 420         |\n",
      "|    total_timesteps      | 88064       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.009098411 |\n",
      "|    clip_fraction        | 0.135       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.573       |\n",
      "|    explained_variance   | 0.209       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 158         |\n",
      "|    n_updates            | 1440        |\n",
      "|    policy_gradient_loss | -0.0155     |\n",
      "|    std                  | 0.136       |\n",
      "|    value_loss           | 395         |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 282         |\n",
      "|    ep_rew_mean          | 1.86e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 44          |\n",
      "|    time_elapsed         | 430         |\n",
      "|    total_timesteps      | 90112       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.006351363 |\n",
      "|    clip_fraction        | 0.0404      |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.575       |\n",
      "|    explained_variance   | 0.872       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 1.33e+04    |\n",
      "|    n_updates            | 1450        |\n",
      "|    policy_gradient_loss | -0.00465    |\n",
      "|    std                  | 0.136       |\n",
      "|    value_loss           | 1.43e+04    |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 275        |\n",
      "|    ep_rew_mean          | 1.76e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 209        |\n",
      "|    iterations           | 45         |\n",
      "|    time_elapsed         | 440        |\n",
      "|    total_timesteps      | 92160      |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.08526982 |\n",
      "|    clip_fraction        | 0.183      |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.577      |\n",
      "|    explained_variance   | 0.955      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 54.3       |\n",
      "|    n_updates            | 1460       |\n",
      "|    policy_gradient_loss | -0.00692   |\n",
      "|    std                  | 0.136      |\n",
      "|    value_loss           | 862        |\n",
      "----------------------------------------\n",
      "------------------------------------------\n",
      "| rollout/                |              |\n",
      "|    ep_len_mean          | 279          |\n",
      "|    ep_rew_mean          | 1.82e+03     |\n",
      "| time/                   |              |\n",
      "|    fps                  | 209          |\n",
      "|    iterations           | 46           |\n",
      "|    time_elapsed         | 449          |\n",
      "|    total_timesteps      | 94208        |\n",
      "| train/                  |              |\n",
      "|    approx_kl            | 0.0031426614 |\n",
      "|    clip_fraction        | 0.0584       |\n",
      "|    clip_range           | 0.2          |\n",
      "|    entropy_loss         | 0.582        |\n",
      "|    explained_variance   | 0.762        |\n",
      "|    learning_rate        | 0.0003       |\n",
      "|    loss                 | 48           |\n",
      "|    n_updates            | 1470         |\n",
      "|    policy_gradient_loss | 0.00211      |\n",
      "|    std                  | 0.135        |\n",
      "|    value_loss           | 4.73e+03     |\n",
      "------------------------------------------\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 263         |\n",
      "|    ep_rew_mean          | 1.62e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 47          |\n",
      "|    time_elapsed         | 459         |\n",
      "|    total_timesteps      | 96256       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.022834638 |\n",
      "|    clip_fraction        | 0.199       |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.593       |\n",
      "|    explained_variance   | -0.136      |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 18.1        |\n",
      "|    n_updates            | 1480        |\n",
      "|    policy_gradient_loss | 0.0209      |\n",
      "|    std                  | 0.133       |\n",
      "|    value_loss           | 49.9        |\n",
      "-----------------------------------------\n",
      "-----------------------------------------\n",
      "| rollout/                |             |\n",
      "|    ep_len_mean          | 255         |\n",
      "|    ep_rew_mean          | 1.52e+03    |\n",
      "| time/                   |             |\n",
      "|    fps                  | 209         |\n",
      "|    iterations           | 48          |\n",
      "|    time_elapsed         | 469         |\n",
      "|    total_timesteps      | 98304       |\n",
      "| train/                  |             |\n",
      "|    approx_kl            | 0.015790146 |\n",
      "|    clip_fraction        | 0.00859     |\n",
      "|    clip_range           | 0.2         |\n",
      "|    entropy_loss         | 0.6         |\n",
      "|    explained_variance   | 0.842       |\n",
      "|    learning_rate        | 0.0003      |\n",
      "|    loss                 | 2.04e+04    |\n",
      "|    n_updates            | 1490        |\n",
      "|    policy_gradient_loss | -0.000195   |\n",
      "|    std                  | 0.133       |\n",
      "|    value_loss           | 2e+04       |\n",
      "-----------------------------------------\n",
      "----------------------------------------\n",
      "| rollout/                |            |\n",
      "|    ep_len_mean          | 251        |\n",
      "|    ep_rew_mean          | 1.48e+03   |\n",
      "| time/                   |            |\n",
      "|    fps                  | 209        |\n",
      "|    iterations           | 49         |\n",
      "|    time_elapsed         | 479        |\n",
      "|    total_timesteps      | 100352     |\n",
      "| train/                  |            |\n",
      "|    approx_kl            | 0.05579745 |\n",
      "|    clip_fraction        | 0.0552     |\n",
      "|    clip_range           | 0.2        |\n",
      "|    entropy_loss         | 0.603      |\n",
      "|    explained_variance   | 0.974      |\n",
      "|    learning_rate        | 0.0003     |\n",
      "|    loss                 | 638        |\n",
      "|    n_updates            | 1500       |\n",
      "|    policy_gradient_loss | -0.000318  |\n",
      "|    std                  | 0.132      |\n",
      "|    value_loss           | 839        |\n",
      "----------------------------------------\n"
     ]
    }
   ],
   "source": [
    "model=model.learn(total_timesteps=100000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mean_reward:2240.32 +/- 2689.60\n"
     ]
    }
   ],
   "source": [
    "# Agent, after 3,000,000 steps\n",
    "mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=1000)\n",
    "print(f\"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save(\"ppo_acc_bigger_300000_steps\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nnequiv-tf1",
   "language": "python",
   "name": "nnequiv-tf1"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
