{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "import re\n",
    "import multiprocessing\n",
    "import os.path as osp\n",
    "import gym\n",
    "from collections import defaultdict\n",
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import shlex\n",
    "\n",
    "from baselines.common.vec_env import VecFrameStack, VecNormalize, VecEnv\n",
    "from baselines.common.vec_env.vec_video_recorder import VecVideoRecorder\n",
    "from baselines.common.cmd_util import common_arg_parser, parse_unknown_args, make_vec_env, make_env\n",
    "from baselines.common.tf_util import get_session\n",
    "from baselines import logger\n",
    "from importlib import import_module\n",
    "\n",
    "try:\n",
    "    from mpi4py import MPI\n",
    "except ImportError:\n",
    "    MPI = None\n",
    "\n",
    "try:\n",
    "    import pybullet_envs\n",
    "except ImportError:\n",
    "    pybullet_envs = None\n",
    "\n",
    "try:\n",
    "    import roboschool\n",
    "except ImportError:\n",
    "    roboschool = None\n",
    "\n",
    "_game_envs = defaultdict(set)\n",
    "for env in gym.envs.registry.all():\n",
    "    # TODO: solve this with regexes\n",
    "    env_type = env.entry_point.split(':')[0].split('.')[-1]\n",
    "    _game_envs[env_type].add(env.id)\n",
    "\n",
    "# reading benchmark names directly from retro requires\n",
    "# importing retro here, and for some reason that crashes tensorflow\n",
    "# in ubuntu\n",
    "_game_envs['retro'] = {\n",
    "    'BubbleBobble-Nes',\n",
    "    'SuperMarioBros-Nes',\n",
    "    'TwinBee3PokoPokoDaimaou-Nes',\n",
    "    'SpaceHarrier-Nes',\n",
    "    'SonicTheHedgehog-Genesis',\n",
    "    'Vectorman-Genesis',\n",
    "    'FinalFight-Snes',\n",
    "    'SpaceInvaders-Snes',\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Logging to /tmp/openai-2020-09-10-18-34-05-208462\n",
      "env_type: mujoco\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/tf_util.py:53: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/tf_util.py:70: The name tf.InteractiveSession is deprecated. Please use tf.compat.v1.InteractiveSession instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/misc_util.py:58: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/running_mean_std.py:45: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/running_mean_std.py:50: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.\n",
      "\n",
      "Training ppo2 on mujoco:Hopper-v2 with arguments \n",
      "{'nsteps': 2048, 'nminibatches': 32, 'lam': 0.95, 'gamma': 0.99, 'noptepochs': 10, 'log_interval': 1, 'ent_coef': 0.0, 'lr': <function mujoco.<locals>.<lambda> at 0x7f6be8093488>, 'cliprange': 0.2, 'value_network': 'copy', 'load_path': '/home/yunze/project/aril/openai_baselines/models/hopper_20M_ppo2_norm', 'network': 'mlp'}\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/mpi_adam_optimizer.py:11: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train.AdamOptimizer instead.\n",
      "\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/input.py:57: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `tf.cast` instead.\n",
      "WARNING:tensorflow:From /home/yunze/project/aril/openai_baselines/baselines/common/models.py:94: flatten (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use keras.layers.flatten instead.\n",
      "WARNING:tensorflow:From /home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
      "Running trained model\n",
      "GLFW error (code %d): %s 65544 b'X11: The DISPLAY environment variable is missing'\n",
      "GLFW error (code %d): %s 65544 b'X11: The DISPLAY environment variable is missing'\n"
     ]
    },
    {
     "ename": "GlfwError",
     "evalue": "Failed to initialize GLFW",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m--------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mGlfwError\u001b[0m                    Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-2-81371bb88b51>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    184\u001b[0m         \u001b[0mobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrew\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    185\u001b[0m         \u001b[0mepisode_rew\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mrew\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 186\u001b[0;31m         \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    187\u001b[0m         \u001b[0mdone_any\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    188\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdone_any\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/project/aril/openai_baselines/baselines/common/vec_env/vec_env.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode)\u001b[0m\n\u001b[1;32m    165\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    166\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'human'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 167\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvenv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    168\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    169\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mget_images\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/project/aril/openai_baselines/baselines/common/vec_env/dummy_vec_env.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode)\u001b[0m\n\u001b[1;32m     77\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'human'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_envs\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menvs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/gym/core.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode, **kwargs)\u001b[0m\n\u001b[1;32m    233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    234\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'human'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 235\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    236\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    237\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/gym/core.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode, **kwargs)\u001b[0m\n\u001b[1;32m    233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    234\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'human'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 235\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    236\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    237\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/gym/core.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode, **kwargs)\u001b[0m\n\u001b[1;32m    233\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    234\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'human'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 235\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    236\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    237\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/gym/envs/mujoco/mujoco_env.py\u001b[0m in \u001b[0;36mrender\u001b[0;34m(self, mode, width, height, camera_id, camera_name)\u001b[0m\n\u001b[1;32m    156\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    157\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'human'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 158\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_viewer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    159\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    160\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/anaconda3/envs/aril/lib/python3.6/site-packages/gym/envs/mujoco/mujoco_env.py\u001b[0m in \u001b[0;36m_get_viewer\u001b[0;34m(self, mode)\u001b[0m\n\u001b[1;32m    168\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mviewer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    169\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'human'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 170\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mviewer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmujoco_py\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMjViewer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    171\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'rgb_array'\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'depth_array'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    172\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mviewer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmujoco_py\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMjRenderContextOffscreen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msim\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/.local/lib/python3.6/site-packages/mujoco_py/mjviewer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, sim)\u001b[0m\n\u001b[1;32m    135\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    136\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 137\u001b[0;31m         \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    138\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    139\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ncam\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mncam\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/.local/lib/python3.6/site-packages/mujoco_py/mjviewer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, sim)\u001b[0m\n\u001b[1;32m     26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     27\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m         \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     29\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     30\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_gui_lock\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/yunze/.local/lib/python3.6/site-packages/mujoco_py/mjrendercontext.pyx\u001b[0m in \u001b[0;36mmujoco_py.cymj.MjRenderContextWindow.__init__\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/home/yunze/.local/lib/python3.6/site-packages/mujoco_py/mjrendercontext.pyx\u001b[0m in \u001b[0;36mmujoco_py.cymj.MjRenderContext.__init__\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/home/yunze/.local/lib/python3.6/site-packages/mujoco_py/mjrendercontext.pyx\u001b[0m in \u001b[0;36mmujoco_py.cymj.MjRenderContext._setup_opengl_context\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/home/yunze/.local/lib/python3.6/site-packages/mujoco_py/opengl_context.pyx\u001b[0m in \u001b[0;36mmujoco_py.cymj.GlfwContext.__init__\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m/home/yunze/.local/lib/python3.6/site-packages/mujoco_py/opengl_context.pyx\u001b[0m in \u001b[0;36mmujoco_py.cymj.GlfwContext._init_glfw\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;31mGlfwError\u001b[0m: Failed to initialize GLFW"
     ]
    }
   ],
   "source": [
    "def train(args, extra_args):\n",
    "    env_type, env_id = get_env_type(args)\n",
    "    print('env_type: {}'.format(env_type))\n",
    "\n",
    "    total_timesteps = int(args.num_timesteps)\n",
    "    seed = args.seed\n",
    "\n",
    "    learn = get_learn_function(args.alg)\n",
    "    alg_kwargs = get_learn_function_defaults(args.alg, env_type)\n",
    "    alg_kwargs.update(extra_args)\n",
    "\n",
    "    env = build_env(args)\n",
    "    if args.save_video_interval != 0:\n",
    "        env = VecVideoRecorder(env, osp.join(logger.get_dir(), \"videos\"), record_video_trigger=lambda x: x % args.save_video_interval == 0, video_length=args.save_video_length)\n",
    "\n",
    "    if args.network:\n",
    "        alg_kwargs['network'] = args.network\n",
    "    else:\n",
    "        if alg_kwargs.get('network') is None:\n",
    "            alg_kwargs['network'] = get_default_network(env_type)\n",
    "\n",
    "    print('Training {} on {}:{} with arguments \\n{}'.format(args.alg, env_type, env_id, alg_kwargs))\n",
    "\n",
    "    model = learn(\n",
    "        env=env,\n",
    "        seed=seed,\n",
    "        total_timesteps=total_timesteps,\n",
    "        **alg_kwargs\n",
    "    )\n",
    "\n",
    "    return model, env\n",
    "\n",
    "\n",
    "def build_env(args):\n",
    "    ncpu = multiprocessing.cpu_count()\n",
    "    if sys.platform == 'darwin': ncpu //= 2\n",
    "    nenv = args.num_env or ncpu\n",
    "    alg = args.alg\n",
    "    seed = args.seed\n",
    "\n",
    "    env_type, env_id = get_env_type(args)\n",
    "\n",
    "    if env_type in {'atari', 'retro'}:\n",
    "        if alg == 'deepq':\n",
    "            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})\n",
    "        elif alg == 'trpo_mpi':\n",
    "            env = make_env(env_id, env_type, seed=seed)\n",
    "        else:\n",
    "            frame_stack_size = 4\n",
    "            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)\n",
    "            env = VecFrameStack(env, frame_stack_size)\n",
    "\n",
    "    else:\n",
    "        config = tf.ConfigProto(allow_soft_placement=True,\n",
    "                               intra_op_parallelism_threads=1,\n",
    "                               inter_op_parallelism_threads=1)\n",
    "        config.gpu_options.allow_growth = True\n",
    "        get_session(config=config)\n",
    "\n",
    "        flatten_dict_observations = alg not in {'her'}\n",
    "        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)\n",
    "\n",
    "        if env_type == 'mujoco':\n",
    "            env = VecNormalize(env, use_tf=True)\n",
    "\n",
    "    return env\n",
    "\n",
    "\n",
    "def get_env_type(args):\n",
    "    env_id = args.env\n",
    "\n",
    "    if args.env_type is not None:\n",
    "        return args.env_type, env_id\n",
    "\n",
    "    # Re-parse the gym registry, since we could have new envs since last time.\n",
    "    for env in gym.envs.registry.all():\n",
    "        env_type = env.entry_point.split(':')[0].split('.')[-1]\n",
    "        _game_envs[env_type].add(env.id)  # This is a set so add is idempotent\n",
    "\n",
    "    if env_id in _game_envs.keys():\n",
    "        env_type = env_id\n",
    "        env_id = [g for g in _game_envs[env_type]][0]\n",
    "    else:\n",
    "        env_type = None\n",
    "        for g, e in _game_envs.items():\n",
    "            if env_id in e:\n",
    "                env_type = g\n",
    "                break\n",
    "        if ':' in env_id:\n",
    "            env_type = re.sub(r':.*', '', env_id)\n",
    "        assert env_type is not None, 'env_id {} is not recognized in env types'.format(env_id, _game_envs.keys())\n",
    "\n",
    "    return env_type, env_id\n",
    "\n",
    "\n",
    "def get_default_network(env_type):\n",
    "    if env_type in {'atari', 'retro'}:\n",
    "        return 'cnn'\n",
    "    else:\n",
    "        return 'mlp'\n",
    "\n",
    "def get_alg_module(alg, submodule=None):\n",
    "    submodule = submodule or alg\n",
    "    try:\n",
    "        # first try to import the alg module from baselines\n",
    "        alg_module = import_module('.'.join(['baselines', alg, submodule]))\n",
    "    except ImportError:\n",
    "        # then from rl_algs\n",
    "        alg_module = import_module('.'.join(['rl_' + 'algs', alg, submodule]))\n",
    "\n",
    "    return alg_module\n",
    "\n",
    "\n",
    "def get_learn_function(alg):\n",
    "    return get_alg_module(alg).learn\n",
    "\n",
    "\n",
    "def get_learn_function_defaults(alg, env_type):\n",
    "    try:\n",
    "        alg_defaults = get_alg_module(alg, 'defaults')\n",
    "        kwargs = getattr(alg_defaults, env_type)()\n",
    "    except (ImportError, AttributeError):\n",
    "        kwargs = {}\n",
    "    return kwargs\n",
    "\n",
    "\n",
    "\n",
    "def parse_cmdline_kwargs(args):\n",
    "    '''\n",
    "    convert a list of '='-spaced command-line arguments to a dictionary, evaluating python objects when possible\n",
    "    '''\n",
    "    def parse(v):\n",
    "\n",
    "        assert isinstance(v, str)\n",
    "        try:\n",
    "            return eval(v)\n",
    "        except (NameError, SyntaxError):\n",
    "            return v\n",
    "\n",
    "    return {k: parse(v) for k,v in parse_unknown_args(args).items()}\n",
    "\n",
    "\n",
    "def configure_logger(log_path, **kwargs):\n",
    "    if log_path is not None:\n",
    "        logger.configure(log_path)\n",
    "    else:\n",
    "        logger.configure(**kwargs)\n",
    "\n",
    "\n",
    "args = shlex.split('python -m baselines.run --alg=ppo2 --env=Hopper-v2 --num_timesteps=0 --load_path=/home/yunze/project/aril/openai_baselines/models/hopper_20M_ppo2_norm --play')\n",
    "\n",
    "        \n",
    "arg_parser = common_arg_parser()\n",
    "args, unknown_args = arg_parser.parse_known_args(args)\n",
    "extra_args = parse_cmdline_kwargs(unknown_args)\n",
    "\n",
    "if MPI is None or MPI.COMM_WORLD.Get_rank() == 0:\n",
    "    rank = 0\n",
    "    configure_logger(args.log_path)\n",
    "else:\n",
    "    rank = MPI.COMM_WORLD.Get_rank()\n",
    "    configure_logger(args.log_path, format_strs=[])\n",
    "\n",
    "model, env = train(args, extra_args)\n",
    "\n",
    "if args.save_path is not None and rank == 0:\n",
    "    save_path = osp.expanduser(args.save_path)\n",
    "    model.save(save_path)\n",
    "\n",
    "if args.play:\n",
    "    logger.log(\"Running trained model\")\n",
    "    obs = env.reset()\n",
    "\n",
    "    state = model.initial_state if hasattr(model, 'initial_state') else None\n",
    "    dones = np.zeros((1,))\n",
    "\n",
    "    episode_rew = np.zeros(env.num_envs) if isinstance(env, VecEnv) else np.zeros(1)\n",
    "    while True:\n",
    "        if state is not None:\n",
    "            actions, _, state, _ = model.step(obs,S=state, M=dones)\n",
    "        else:\n",
    "            actions, _, _, _ = model.step(obs)\n",
    "\n",
    "        obs, rew, done, _ = env.step(actions)\n",
    "        episode_rew += rew\n",
    "        env.render()\n",
    "        done_any = done.any() if isinstance(done, np.ndarray) else done\n",
    "        if done_any:\n",
    "            for i in np.nonzero(done)[0]:\n",
    "                print('episode_rew={}'.format(episode_rew[i]))\n",
    "                episode_rew[i] = 0\n",
    "\n",
    "env.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
