{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "67576132",
   "metadata": {},
   "outputs": [],
   "source": [
    "from math import inf\n",
    "import numpy as np\n",
    "import scipy\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "import gymnasium as gym\n",
    "import pandas as pd\n",
    "from datetime import datetime\n",
    "from sklearn.base import BaseEstimator\n",
    "from realkd.boosting import WeightUpdateMethod\n",
    "from pandas import qcut\n",
    "from realkd.rules import SquaredLoss, AdditiveRuleEnsemble, Rule\n",
    "from realkd.search import Context\n",
    "\n",
    "import warnings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8b8f01e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "db545637",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('Acrobot-v1', render_mode='rgb_array')\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "print(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7aa6c889",
   "metadata": {},
   "outputs": [],
   "source": [
    "column_names = ['cos1', 'sin1', 'cos2', 'sin2', 'w1', 'w2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "24f75485",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(463647)\n",
    "torch.manual_seed(350907)\n",
    "num_rules=12\n",
    "action_space=3\n",
    "reg=100.0\n",
    "nn_actor_train_iterations=1000\n",
    "rule_actor_train_iterations=1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "84732b72",
   "metadata": {},
   "outputs": [],
   "source": [
    "def orthonormalization(Q):\n",
    "    n, k = Q.shape\n",
    "    O = np.zeros(shape=(n, k))\n",
    "    q = Q[:, 0]\n",
    "    O[:, 0] = q / (norm(q) + 1e-6)\n",
    "    for i in range(1, k):\n",
    "        O_i = O[:, :i]\n",
    "        q = Q[:, i]\n",
    "        q_orth = q - O_i.dot(O_i.T.dot(q))\n",
    "        O[:, i] = q_orth / (norm(q_orth) + 1e-6)\n",
    "    return O"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "973f1f9b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def softmax1(action, values, other_values, location):\n",
    "    all_values = np.insert(other_values, location, values, axis=1)\n",
    "    exps = np.exp(all_values)\n",
    "    res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
    "    return res\n",
    "\n",
    "def norm(x):\n",
    "    \"\"\"\n",
    "    Calculate the L-2 norm of a vector\n",
    "    :param x: the vector whose L-2 norm is to be calculated\n",
    "    :return: the L-2 norm of the vector\n",
    "    \"\"\"\n",
    "    return (x * x).sum() ** 0.5\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "5b0af6ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "class PiLoss:\n",
    "    _instance = None\n",
    "\n",
    "    def __new__(cls):\n",
    "        if cls._instance is None:\n",
    "            cls._instance = super(PiLoss, cls).__new__(cls)\n",
    "        return cls._instance\n",
    "\n",
    "    @staticmethod\n",
    "    def __call__(action, values, advantage, other_values, current):\n",
    "        \"\"\"\n",
    "        :param action: the action chosen\n",
    "        :param values: the values provided by current model\n",
    "        :param other_values: the output by other models\n",
    "        :param current: the action represented by the current model\n",
    "        \"\"\"\n",
    "        sm = softmax1(action, values, other_values, current)+1e-6\n",
    "        return -np.log(sm) * advantage\n",
    "\n",
    "    @staticmethod\n",
    "    def g(action, values, advantage, other_values, current):\n",
    "        return np.where(action == current, -advantage * (1 - softmax1(current, values, other_values, current)),\n",
    "                        advantage * softmax1(current, values, other_values, current))\n",
    "\n",
    "    @staticmethod\n",
    "    def h(action, values, advantage, other_values, current):\n",
    "        sm = softmax1(current, values, other_values, current)\n",
    "        return advantage * sm * (1 - sm)\n",
    "\n",
    "    @staticmethod\n",
    "    def __repr__():\n",
    "        return 'pi_loss'\n",
    "\n",
    "    @staticmethod\n",
    "    def __str__():\n",
    "        return 'pi_loss'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "67942365",
   "metadata": {},
   "outputs": [],
   "source": [
    "loss_functions = {\n",
    "    'pi_loss': PiLoss()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "693287f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def loss_function(loss):\n",
    "    \"\"\"Provides loss functions from string representation.\n",
    "\n",
    "    :param loss: string identifier of loss function loss function\n",
    "    :return: loss function matching corresponding to input string (or unchanged input if was already loss function)\n",
    "    \"\"\"\n",
    "    if callable(loss):\n",
    "        return loss\n",
    "    else:\n",
    "        return loss_functions[loss]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "ae0af97c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_risk(loss, action, rules, states, reg, advantage, others, current):\n",
    "    weights = np.array([rule.y for rule in rules])\n",
    "    risk = sum(loss(action, rules(states), advantage, others, current)) + reg * sum(weights * weights) / 2\n",
    "    return risk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d06dc96b",
   "metadata": {},
   "outputs": [],
   "source": [
    "class FullyCorrectiveA2C:\n",
    "    def __init__(self, loss='pi_loss', reg=1.0, solver='L-BFGS-B'):\n",
    "        self.loss = loss_function(loss)\n",
    "        self.reg = reg\n",
    "        self.solver = solver\n",
    "\n",
    "    @staticmethod\n",
    "    def get_risk(loss, action, q_mat, reg, advantage, others, current):\n",
    "        def sum_loss(weights):\n",
    "            return sum(loss(action, q_mat.dot(weights), advantage, others, current)) + reg * sum(weights * weights) / 2\n",
    "\n",
    "        return sum_loss\n",
    "\n",
    "    @staticmethod\n",
    "    def get_gradient(g, action, q_mat, reg, advantage, other, current):\n",
    "        def gradient(weights):\n",
    "            grad_vec = g(action, q_mat.dot(weights), advantage, other, current)\n",
    "            return q_mat.T.dot(grad_vec) + reg * weights\n",
    "\n",
    "        return gradient\n",
    "\n",
    "    @staticmethod\n",
    "    def get_hessian(h, action, q_mat, reg, advantage, other, current):\n",
    "        def hessian(weights):\n",
    "            h_vec = h(action, q_mat.dot(weights), advantage, other, current)\n",
    "            return q_mat.T.dot(np.diag(h_vec)).dot(q_mat) + np.diag([reg] * len(weights))\n",
    "\n",
    "        return hessian\n",
    "\n",
    "    def calc_weight(self, data, action, rules, advantage, other, current):\n",
    "        g = self.loss.g\n",
    "        h = self.loss.h\n",
    "        loss = self.loss\n",
    "        y = np.array(action)\n",
    "        q_mat = np.column_stack(\n",
    "            [rules[i].q(data) + np.zeros(len(data)) for i in range(len(rules))])\n",
    "        sum_loss = self.get_risk(loss, y, q_mat, self.reg, advantage, other, current)\n",
    "        gradient = self.get_gradient(g, y, q_mat, self.reg, advantage, other, current)\n",
    "        hessian = self.get_hessian(h, y, q_mat, self.reg, advantage, other, current)\n",
    "        if self.solver == 'GD':  # Gradient descent\n",
    "            w = np.array([r.y for r in rules])\n",
    "            old_w = np.ones_like(w) * (1.0 if len(w) - sum(w) > 1e-5 else 2.0)\n",
    "            i = 0\n",
    "            while norm(old_w - w) > 1e-3 and i < 50:\n",
    "                old_w = np.array(w)\n",
    "                if norm(gradient(w)) == 0:\n",
    "                    break\n",
    "                p = -gradient(w) / norm(gradient(w))\n",
    "                w += GoldenRatioSearch(sum_loss, old_w, p, gradient).run() * p\n",
    "                i += 1\n",
    "        elif self.solver == 'Line':\n",
    "            w = np.array([r.y for r in rules])\n",
    "            if norm(gradient(w)) != 0:\n",
    "                p = -gradient(w) / norm(gradient(w))\n",
    "                distance = GoldenRatioSearch(sum_loss, w, p, gradient).run()\n",
    "                w += distance * p\n",
    "        else:\n",
    "            w = np.array([r.y for r in rules])\n",
    "            w = scipy.optimize.minimize(sum_loss, w, method=self.solver, jac=gradient,  # hess=hessian,\n",
    "                                        options={'disp': False}).x\n",
    "\n",
    "        return w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "2d0c04a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "class ObjectFunction:\n",
    "    def __init__(self, data, target, predictions, loss, reg, advantage, other_values, current, rules=None):\n",
    "        self.loss = loss_function(loss)\n",
    "        self.reg = reg\n",
    "        predictions = np.zeros_like(\n",
    "            target) if predictions is None else predictions\n",
    "        g = np.array(self.loss.g(target, predictions, advantage, other_values, current))\n",
    "        h = np.array(self.loss.h(target, predictions, advantage, other_values, current)) + 1e-6\n",
    "        r = g / h\n",
    "        order = np.argsort(r)[::-1]\n",
    "        self.g = g[order]\n",
    "        self.h = h[order]\n",
    "        self.data = data.iloc[order].reset_index(drop=True)\n",
    "        self.target = target.iloc[order].reset_index(drop=True)\n",
    "        self.n = len(target)\n",
    "\n",
    "    def __call__(self, ext):\n",
    "        raise NotImplementedError()\n",
    "\n",
    "    def bound(self, ext):\n",
    "        raise NotImplementedError()\n",
    "\n",
    "    def search(self, method='greedy', verbose=False, **search_params):\n",
    "        from realkd.search import search_methods\n",
    "        ctx = Context.from_df(self.data, **search_params)\n",
    "        if verbose >= 2:\n",
    "            print(\n",
    "                f'Created search context with {len(ctx.attributes)} attributes')\n",
    "        return search_methods[method](ctx, self, self.bound, verbose=verbose, **search_params).run()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "eaec4413",
   "metadata": {},
   "outputs": [],
   "source": [
    "class OrthogonalBoostingObjective(ObjectFunction):\n",
    "    def __init__(self, data, target, advantage, other_values, current, predictions=None, loss=SquaredLoss, reg=1.0,\n",
    "                 rules=None,\n",
    "                 epsilon=1e-4, **kwargs):\n",
    "        super().__init__(data, target, predictions, loss, reg, advantage, other_values, current, rules)\n",
    "        self.rules = [] if rules is None else rules\n",
    "        self.loss = loss_function(loss)\n",
    "        self.reg = reg\n",
    "        self.epsilon = epsilon\n",
    "        predictions = np.zeros_like(\n",
    "            target) if predictions is None else predictions\n",
    "        g = np.array(self.loss.g(target, predictions, advantage, other_values, current))\n",
    "        self.n = len(target)\n",
    "        r = g\n",
    "        order = np.argsort(r)[::-1]\n",
    "        self.g = g[order]\n",
    "        self.data = data.iloc[order].reset_index(drop=True)\n",
    "        self.target = target.iloc[order].reset_index(drop=True)\n",
    "        if len(rules) != 0:\n",
    "            orth_basis = kwargs['orth_basis']\n",
    "            self.orth_basis = orth_basis[order]\n",
    "            self.g = self.g - self.orth_basis @ self.orth_basis.T @ self.g\n",
    "        else:\n",
    "            self.orth_basis = np.zeros(self.n)\n",
    "\n",
    "    def __call__(self, ext):\n",
    "        if len(ext) == 0:\n",
    "            return -inf\n",
    "        g_q = self.g[ext]\n",
    "        if len(self.rules) == 0:\n",
    "            h_q = self.h[ext]\n",
    "            return abs(g_q.sum()) / np.sqrt(h_q.sum())\n",
    "        length = self.fast_orth_norm(ext)\n",
    "        if length > 1e-4:\n",
    "            obj = abs(g_q.sum()) / (length + self.epsilon)\n",
    "        else:\n",
    "            obj = 0\n",
    "        return obj\n",
    "\n",
    "    def fast_orth_norm(self, ext):\n",
    "        deltas = self.orth_basis[ext]\n",
    "        length = len(ext)\n",
    "        okqi = abs(np.sum(deltas, axis=0))\n",
    "        q_para_norms = (okqi ** 2).sum()\n",
    "        q_orth_norms_sq = np.abs(length - q_para_norms)\n",
    "        return np.sqrt(q_orth_norms_sq)\n",
    "\n",
    "    def fast_para_norms_prefix(self, ext):\n",
    "        deltas = self.orth_basis[ext]\n",
    "        length = len(ext)\n",
    "        okqi = np.cumsum(deltas, axis=0)\n",
    "        q_para_norms = (okqi ** 2).sum(axis=1)\n",
    "        q_orth_norms_sq = np.abs(np.arange(1, length + 1) - q_para_norms)\n",
    "        q_orth_norms = np.sqrt(q_orth_norms_sq)\n",
    "        return q_orth_norms\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "626cceb7",
   "metadata": {},
   "outputs": [],
   "source": [
    "class GeneralRuleBoostingEstimator(BaseEstimator):\n",
    "    def __init__(self, num_rules, objective_function, weight_update_method, loss='squared', reg=1.0,\n",
    "                 search='greedy', max_col_attr=10,\n",
    "                 search_params=None, verbose=False):\n",
    "        if search_params is None:\n",
    "            search_params = {'order': 'bestboundfirst', 'apx': 1.0, 'max_depth': None, 'discretization': qcut,\n",
    "                             'max_col_attr': max_col_attr}\n",
    "        self.num_rules = num_rules\n",
    "        self.num_components = 500\n",
    "        self.objective = objective_function\n",
    "        self.objective_function = objective_function\n",
    "        self.max_col_attr = max_col_attr\n",
    "        self.weight_update_method = weight_update_method\n",
    "        self.loss = loss_function(loss)\n",
    "        self.reg = reg\n",
    "        self.weight_update_method.loss = loss\n",
    "        self.weight_update_method.reg = reg\n",
    "        self.verbose = verbose\n",
    "        self.search = search\n",
    "        self.rules_ = AdditiveRuleEnsemble([])\n",
    "        self.search_params = search_params\n",
    "        self.history = []\n",
    "        self.time = []\n",
    "\n",
    "    def set_reg(self, reg):\n",
    "        self.reg = reg\n",
    "        self.objective.reg = reg\n",
    "        self.weight_update_method.reg = reg\n",
    "\n",
    "    def fit(self, data, target, advantage, other_values, current, has_origin_rules=False, verbose=False):\n",
    "        if not has_origin_rules:\n",
    "            self.history = []\n",
    "            self.time = []\n",
    "            self.rules_.members = []\n",
    "            orth_basis = np.array([])\n",
    "        else:\n",
    "            q_mat = np.column_stack(\n",
    "                [self.rules_[i].q(data) + np.zeros(len(data)) for i in range(len(self.rules_))])\n",
    "            orth_basis = orthonormalization(q_mat)\n",
    "        num_components = 0\n",
    "        while len(self.rules_) < self.num_rules and num_components < self.num_components:\n",
    "            start_time = datetime.now()\n",
    "            # Search for a rule\n",
    "            scores = self.rules_(data)\n",
    "            obj = self.objective(data, target, advantage, other_values, current, predictions=scores,\n",
    "                                 loss=self.loss, reg=self.reg, rules=self.rules_, orth_basis=orth_basis)\n",
    "            q = obj.search(method=self.search, verbose=verbose,\n",
    "                           **self.search_params)\n",
    "            if hasattr(self.objective, 'opt_weight') and callable(getattr(self.objective, 'opt_weight')):\n",
    "                y = obj.opt_weight(q)\n",
    "            else:\n",
    "                y = 1.0  # np.random.random()\n",
    "            q_vec = q(data)\n",
    "            num_components += (1 + len(q))\n",
    "            if len(orth_basis) == 0:\n",
    "                basis = q_vec / norm(q_vec)\n",
    "                orth_basis = np.array([basis]).T\n",
    "            else:\n",
    "                basis = q_vec - orth_basis.dot(orth_basis.T.dot(q_vec))\n",
    "                basis = basis / (norm(basis) + 1e-6)\n",
    "                orth_basis = np.hstack((orth_basis, np.array([basis]).T))\n",
    "            rule = Rule(q, y)\n",
    "            if self.verbose:\n",
    "                print(rule)\n",
    "            self.rules_.append(rule)\n",
    "            # Calculate weights\n",
    "            weights = self.weight_update_method.calc_weight(\n",
    "                data, target, self.rules_, advantage, other_values, current)\n",
    "            for i in range(len(self.rules_)):\n",
    "                self.rules_[i].y = weights[i]\n",
    "            self.history.append(AdditiveRuleEnsemble(\n",
    "                [Rule(q=rule.q, y=rule.y) for rule in self.rules_.members]))\n",
    "            end_time = datetime.now()\n",
    "            self.time.append(str(end_time - start_time))\n",
    "        return self\n",
    "\n",
    "    def predict(self, data):\n",
    "        loss = loss_function(self.loss)\n",
    "        return loss.preidictions(self.rules_(data))\n",
    "\n",
    "    def decision_function(self, data):\n",
    "        return self.rules_(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b50ce787",
   "metadata": {},
   "outputs": [],
   "source": [
    "# class Actor(nn.Module):\n",
    "#     def __init__(self, action_space, node=16):\n",
    "#         super(Actor, self).__init__()\n",
    "#         self.fc1 = nn.Linear(4, node)  \n",
    "# #         self.fc1_ = nn.Linear(node, node)\n",
    "#         self.fc2 = nn.Linear(node, action_space)\n",
    "\n",
    "#     def forward(self, state):\n",
    "#         x = F.relu(self.fc1(state))\n",
    "# #         x = F.relu(self.fc1_(x))\n",
    "#         x = self.fc2(x)\n",
    "#         return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "84b0ee0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def copy_rules(origin_rules):\n",
    "    rules = []\n",
    "    for n in range(len(origin_rules)):\n",
    "        rule = origin_rules[n]\n",
    "        rules.append(Rule(q=rule.q, y=rule.y))\n",
    "    return AdditiveRuleEnsemble(rules)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "bb3fcc8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "class ActorRule():\n",
    "    def __init__(self, action_space=action_space, num_rules=num_rules):\n",
    "        self.action_space = action_space\n",
    "        self.model = [GeneralRuleBoostingEstimator(num_rules=num_rules, objective_function=OrthogonalBoostingObjective,\n",
    "                                                   weight_update_method=FullyCorrectiveA2C(), loss=PiLoss(),\n",
    "                                                   reg=reg, search='greedy', max_col_attr=10, verbose=False) for _ in\n",
    "                      range(action_space)]\n",
    "        self.best_model = [\n",
    "            GeneralRuleBoostingEstimator(num_rules=num_rules, objective_function=OrthogonalBoostingObjective,\n",
    "                                         weight_update_method=FullyCorrectiveA2C(), loss=PiLoss(),\n",
    "                                         reg=0.01, search='greedy', max_col_attr=20, verbose=False) for _ in\n",
    "            range(action_space)]\n",
    "\n",
    "    def __call__(self, state):\n",
    "        res = [self.model[i].rules_(state) for i in range(len(self.model))]\n",
    "        return np.array(res)\n",
    "\n",
    "    def predict(self, state):\n",
    "        res = [self.best_model[i].rules_(state) for i in range(len(self.model))]\n",
    "        return np.array(res)\n",
    "\n",
    "\n",
    "def update_rules(estimator: GeneralRuleBoostingEstimator, x, y, adv, other_values, current):\n",
    "    queries_lst = []\n",
    "    queries = {}\n",
    "    original_rules = copy_rules(estimator.rules_)\n",
    "    origin_risk = calc_risk(PiLoss(), y, original_rules, x, estimator.reg, adv, other_values, current)\n",
    "    for i in range(len(estimator.rules_)):\n",
    "        q_str = str(estimator.rules_[i].q)\n",
    "        if q_str not in queries:\n",
    "            queries[q_str] = estimator.rules_[i].y\n",
    "            queries_lst.append(estimator.rules_[i].q)\n",
    "        else:\n",
    "            queries[q_str] += estimator.rules_[i].y\n",
    "    for k in queries:\n",
    "        queries[k] = abs(queries[k])\n",
    "    min_weight_query = min(queries, key=queries.get)\n",
    "    rules = []\n",
    "    for i in range(len(queries_lst)):\n",
    "        q_str = str(queries_lst[i])\n",
    "        if q_str != min_weight_query:\n",
    "            rules.append(Rule(q=queries_lst[i], y=queries[str(queries_lst[i])]))\n",
    "    if len(rules) == 0:\n",
    "        rules = [Rule(q=queries_lst[0], y=queries[str(queries_lst[0])])]\n",
    "    new_rules = AdditiveRuleEnsemble(rules)\n",
    "    weights = FullyCorrectiveA2C(loss_function('pi_loss'), estimator.reg).calc_weight(x, y, new_rules, adv,\n",
    "                                                                                      other_values, current)\n",
    "    for i in range(len(new_rules)):\n",
    "        new_rules[i].y = weights[i]\n",
    "    estimator.rules_ = new_rules\n",
    "    estimator.fit(x, y, adv, other_values, current, has_origin_rules=True)\n",
    "    weights = FullyCorrectiveA2C(loss_function('pi_loss'), estimator.reg).calc_weight(x, y, estimator.rules_, adv,\n",
    "                                                                                      other_values, current)\n",
    "    for i in range(len(estimator.rules_)):\n",
    "        estimator.rules_[i].y = weights[i]\n",
    "    new_risk = calc_risk(PiLoss(), y, estimator.rules_, x, estimator.reg, adv, other_values, current)\n",
    "    if new_risk > origin_risk:\n",
    "        estimator.rules_ = copy_rules(original_rules)\n",
    "        print(\"origin\", origin_risk, 'new', new_risk, 'not updated')\n",
    "        # risk = sum(PiLoss.__call__(y, estimator.rules_[i].y))\n",
    "    return estimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "2824981f",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Critic(nn.Module):\n",
    "    def __init__(self, node=16):\n",
    "        super(Critic, self).__init__()\n",
    "        self.fc1 = nn.Linear(len(column_names), node)\n",
    "        self.fc2 = nn.Linear(node, 1)\n",
    "\n",
    "    def forward(self, state):\n",
    "        x = F.relu(self.fc1(state))\n",
    "        x = self.fc2(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "6b6e2dd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "gamma = 0.99\n",
    "actor = ActorRule()\n",
    "critic = Critic().to(device)\n",
    "critic_optimizer = optim.AdamW(critic.parameters(), lr=0.001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ae072cc5",
   "metadata": {},
   "outputs": [],
   "source": [
    "class ActorNet(nn.Module):\n",
    "    def __init__(self, hidden_dim=16):\n",
    "        super().__init__()\n",
    "\n",
    "        self.hidden = nn.Linear(len(column_names), hidden_dim)\n",
    "        self.output = nn.Linear(hidden_dim, action_space)\n",
    "\n",
    "    def forward(self, s):\n",
    "        outs = self.hidden(s)\n",
    "        outs = F.relu(outs)\n",
    "        logits = self.output(outs)\n",
    "        return logits\n",
    "\n",
    "\n",
    "actor_func = ActorNet().to(device)\n",
    "value_func = critic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c565d5ef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Run episode 15 with rewards -293.0\r"
     ]
    }
   ],
   "source": [
    "opt1 = torch.optim.AdamW(value_func.parameters(), lr=0.001)\n",
    "opt2 = torch.optim.AdamW(actor_func.parameters(), lr=0.001)\n",
    "def pick_sample(s):\n",
    "    with torch.no_grad():\n",
    "        #   --> size : (1, 4)\n",
    "        s_batch = np.expand_dims(s, axis=0)\n",
    "        s_batch = torch.tensor(s_batch, dtype=torch.float).to(device)\n",
    "        # Get logits from state\n",
    "        #   --> size : (1, 2)\n",
    "        logits = actor_func(s_batch)\n",
    "        #   --> size : (2)\n",
    "        logits = logits.squeeze(dim=0)\n",
    "        # From logits to probabilities\n",
    "        probs = F.softmax(logits, dim=-1)\n",
    "        # Pick up action's sample\n",
    "        a = torch.multinomial(probs, num_samples=1)\n",
    "        # Return\n",
    "        return a.tolist()[0]\n",
    "reward_records = []\n",
    "for i in range(nn_actor_train_iterations):\n",
    "    #\n",
    "    # Run episode till done\n",
    "    #\n",
    "    done = False\n",
    "    states = []\n",
    "    actions = []\n",
    "    rewards = []\n",
    "    s, _ = env.reset(seed=np.random.randint(0, 1e6))\n",
    "    while not done:\n",
    "        states.append(s.tolist())\n",
    "        a = pick_sample(s)\n",
    "        s, r, term, trunc, _ = env.step(a)\n",
    "        done = term or trunc\n",
    "        actions.append(a)\n",
    "        rewards.append(r)\n",
    "\n",
    "    #\n",
    "    # Get cumulative rewards\n",
    "    #\n",
    "    cum_rewards = np.zeros_like(rewards)\n",
    "    reward_len = len(rewards)\n",
    "    for j in reversed(range(reward_len)):\n",
    "        cum_rewards[j] = rewards[j] + (cum_rewards[j+1]*gamma if j+1 < reward_len else 0)\n",
    "\n",
    "    #\n",
    "    # Train (optimize parameters)\n",
    "    #\n",
    "\n",
    "    # Optimize value loss (Critic)\n",
    "    opt1.zero_grad()\n",
    "    states = torch.tensor(states, dtype=torch.float).to(device)\n",
    "    cum_rewards = torch.tensor(cum_rewards, dtype=torch.float).to(device)\n",
    "    values = value_func(states)\n",
    "    values = values.squeeze(dim=1)\n",
    "    vf_loss = F.mse_loss(\n",
    "        values,\n",
    "        cum_rewards,\n",
    "        reduction=\"none\")\n",
    "    vf_loss.sum().backward()\n",
    "    opt1.step()\n",
    "\n",
    "    # Todo 2; RULE BASED UPDATES\n",
    "    with torch.no_grad():\n",
    "        values = value_func(states).squeeze(dim=1)\n",
    "    opt2.zero_grad()\n",
    "    actions = torch.tensor(actions, dtype=torch.int64).to(device)\n",
    "    advantages = cum_rewards - values\n",
    "#     print(advantages)\n",
    "    logits = actor_func(states)\n",
    "    log_probs = -F.cross_entropy(logits, actions, reduction=\"none\")\n",
    "    pi_loss = -log_probs * advantages\n",
    "    pi_loss.sum().backward()\n",
    "    opt2.step()\n",
    "    \n",
    "    # Output total rewards in episode (max 500)\n",
    "    print(\"Run episode {} with rewards {}\".format(i, sum(rewards)), end=\"\\r\")\n",
    "    if sum(rewards)>-300:\n",
    "        break\n",
    "    reward_records.append(sum(rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "73589768",
   "metadata": {},
   "outputs": [],
   "source": [
    "def softmax(logits):\n",
    "    exp_logits = np.exp(logits)\n",
    "    sum_logits = np.sum(exp_logits)\n",
    "    return (exp_logits / sum_logits).reshape([-1])\n",
    "\n",
    "def pick_sample(s):\n",
    "    with torch.no_grad():\n",
    "        s_batch = np.expand_dims(s, axis=0)\n",
    "        logits = actor(pd.DataFrame(s_batch, columns=column_names))\n",
    "        probs = softmax(logits)\n",
    "        a = torch.multinomial(torch.tensor(probs), num_samples=1)\n",
    "        return a.tolist()[0]\n",
    "\n",
    "\n",
    "def train():\n",
    "    reward_records = []\n",
    "    best_rewards = -10000\n",
    "    # best_model = None\n",
    "    for i in range(rule_actor_train_iterations):\n",
    "\n",
    "        done = False\n",
    "        states = []\n",
    "        actions = []\n",
    "        rewards = []\n",
    "        s, _ = env.reset(seed=np.random.randint(0, 1e6))\n",
    "        if i == 2:\n",
    "            print('stop here')\n",
    "        while not done:\n",
    "            states.append(s.tolist())\n",
    "            a = pick_sample(s)\n",
    "            s, r, term, trunc, _ = env.step(a)\n",
    "            done = term or trunc\n",
    "            actions.append(a)\n",
    "            rewards.append(r)\n",
    "\n",
    "        # if sum(rewards) > 480:\n",
    "        #     break\n",
    "        #\n",
    "        # Get cumulative rewards\n",
    "        #\n",
    "        cum_rewards = np.zeros_like(rewards)\n",
    "        reward_len = len(rewards)\n",
    "        for j in reversed(range(reward_len)):\n",
    "            cum_rewards[j] = rewards[j] + (cum_rewards[j + 1] * gamma if j + 1 < reward_len else 0)\n",
    "        #\n",
    "        # Train (optimize parameters)\n",
    "        #\n",
    "        # Optimize value loss (Critic)\n",
    "        critic_optimizer.zero_grad()\n",
    "        states = torch.tensor(states, dtype=torch.float).to(device)\n",
    "        cum_rewards = torch.tensor(cum_rewards, dtype=torch.float).to(device)\n",
    "        values = critic(states)\n",
    "        values = values.squeeze(dim=1)\n",
    "        vf_loss = F.mse_loss(\n",
    "            values,\n",
    "            cum_rewards,\n",
    "            reduction=\"none\")\n",
    "        vf_loss.sum().backward()\n",
    "        critic_optimizer.step()\n",
    "        print('============', i, '===========')\n",
    "#         print(len(actions), 'actions', actions)\n",
    "        reward_records.append(sum(rewards))\n",
    "        print(\"Run episode {} with rewards {}\".format(i,\n",
    "                                                      sum(rewards)))  # , end=\"\\r\")\n",
    "        if i > 0:\n",
    "            if sum(rewards) >= best_rewards:\n",
    "                best_rewards = sum(rewards)\n",
    "                for rr in range(actor.action_space):\n",
    "                    rules = []\n",
    "                    for n in range(len(actor.model[rr].rules_)):\n",
    "                        rule = actor.model[rr].rules_[n]\n",
    "                        rules.append(Rule(q=rule.q, y=rule.y))\n",
    "                    actor.best_model[rr].rules_ = AdditiveRuleEnsemble(rules)\n",
    "                print('best')\n",
    "        if np.average(reward_records[-5:]) > -150.0:\n",
    "            break\n",
    "        #     else:\n",
    "        #         for rr in range(actor.action_space):\n",
    "        #             rules = []\n",
    "        #             for n in range(len(actor.best_model[rr].rules_)):\n",
    "        #                 rule = actor.best_model[rr].rules_[n]\n",
    "        #                 rules.append(Rule(q=rule.q, y=rule.y))\n",
    "        #             actor.model[rr].rules_ = AdditiveRuleEnsemble(rules)\n",
    "        #         print('')\n",
    "\n",
    "        #\n",
    "        # Todo 2; RULE BASED UPDATES\n",
    "        with torch.no_grad():\n",
    "            values = critic(states).squeeze(dim=1)\n",
    "        actions = torch.tensor(actions, dtype=torch.int64).to(device)\n",
    "        advantages = cum_rewards - values\n",
    "        states_array = states.cpu().detach().numpy()\n",
    "        x = pd.DataFrame(states_array, columns=column_names)\n",
    "        y = pd.Series(actions.cpu().detach().numpy())\n",
    "#         advantages = (advantages-advantages.mean())/advantages.std()\n",
    "        adv = pd.Series(advantages.cpu().detach().numpy())\n",
    "        outputs = actor(x).T\n",
    "        # print('adv', advantages.cpu().detach().numpy())\n",
    "        # print('outputs', outputs.tolist())\n",
    "        for j in range(len(actor.model)):\n",
    "            m = actor.model[j]\n",
    "            masks = np.ones_like(outputs, dtype=bool)\n",
    "            masks[:, j] = False\n",
    "            other_values = outputs[masks].reshape(-1, outputs.shape[1] - 1)\n",
    "\n",
    "            if len(m.rules_) == 0:\n",
    "                m.fit(x, y, adv, other_values, j)\n",
    "            else:\n",
    "                for _ in range(1):\n",
    "                    update_rules(m, x, y, adv, other_values, j)\n",
    "        \n",
    "        # print(\n",
    "        #     \"{}\\n--------------\\n{}\\nRun episode {} with rewards {}\".format(actor.model[0].rules_,\n",
    "        #                                                                     actor.model[1].rules_,\n",
    "        #                                                                     i,\n",
    "        #                                                                     sum(rewards)))  # , end=\"\\r\")\n",
    "        \n",
    "    print(\"\\nDone\")\n",
    "    env.close()\n",
    "    return reward_records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "b411aa5d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 0 ===========\n",
      "Run episode 0 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/1846393928.py:32: RuntimeWarning: invalid value encountered in sqrt\n",
      "  return abs(g_q.sum()) / np.sqrt(h_q.sum())\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/1846393928.py:32: RuntimeWarning: invalid value encountered in sqrt\n",
      "  return abs(g_q.sum()) / np.sqrt(h_q.sum())\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/1846393928.py:32: RuntimeWarning: invalid value encountered in sqrt\n",
      "  return abs(g_q.sum()) / np.sqrt(h_q.sum())\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 1 ===========\n",
      "Run episode 1 with rewards -369.0\n",
      "best\n",
      "stop here\n",
      "============ 2 ===========\n",
      "Run episode 2 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 3 ===========\n",
      "Run episode 3 with rewards -500.0\n",
      "============ 4 ===========\n",
      "Run episode 4 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 5 ===========\n",
      "Run episode 5 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 6 ===========\n",
      "Run episode 6 with rewards -500.0\n",
      "============ 7 ===========\n",
      "Run episode 7 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 8 ===========\n",
      "Run episode 8 with rewards -232.0\n",
      "best\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 9 ===========\n",
      "Run episode 9 with rewards -500.0\n",
      "============ 10 ===========\n",
      "Run episode 10 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 11 ===========\n",
      "Run episode 11 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 12 ===========\n",
      "Run episode 12 with rewards -500.0\n",
      "============ 13 ===========\n",
      "Run episode 13 with rewards -222.0\n",
      "best\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 14 ===========\n",
      "Run episode 14 with rewards -500.0\n",
      "============ 15 ===========\n",
      "Run episode 15 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 16 ===========\n",
      "Run episode 16 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 17 ===========\n",
      "Run episode 17 with rewards -500.0\n",
      "============ 18 ===========\n",
      "Run episode 18 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 19 ===========\n",
      "Run episode 19 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 20 ===========\n",
      "Run episode 20 with rewards -500.0\n",
      "============ 21 ===========\n",
      "Run episode 21 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 22 ===========\n",
      "Run episode 22 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 23 ===========\n",
      "Run episode 23 with rewards -500.0\n",
      "============ 24 ===========\n",
      "Run episode 24 with rewards -333.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 25 ===========\n",
      "Run episode 25 with rewards -500.0\n",
      "============ 26 ===========\n",
      "Run episode 26 with rewards -500.0\n",
      "============ 27 ===========\n",
      "Run episode 27 with rewards -500.0\n",
      "============ 28 ===========\n",
      "Run episode 28 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 29 ===========\n",
      "Run episode 29 with rewards -500.0\n",
      "============ 30 ===========\n",
      "Run episode 30 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 31 ===========\n",
      "Run episode 31 with rewards -500.0\n",
      "============ 32 ===========\n",
      "Run episode 32 with rewards -500.0\n",
      "============ 33 ===========\n",
      "Run episode 33 with rewards -500.0\n",
      "============ 34 ===========\n",
      "Run episode 34 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 35 ===========\n",
      "Run episode 35 with rewards -500.0\n",
      "============ 36 ===========\n",
      "Run episode 36 with rewards -500.0\n",
      "============ 37 ===========\n",
      "Run episode 37 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 38 ===========\n",
      "Run episode 38 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 39 ===========\n",
      "Run episode 39 with rewards -500.0\n",
      "============ 40 ===========\n",
      "Run episode 40 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 41 ===========\n",
      "Run episode 41 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 42 ===========\n",
      "Run episode 42 with rewards -500.0\n",
      "============ 43 ===========\n",
      "Run episode 43 with rewards -454.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 44 ===========\n",
      "Run episode 44 with rewards -500.0\n",
      "============ 45 ===========\n",
      "Run episode 45 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 46 ===========\n",
      "Run episode 46 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 47 ===========\n",
      "Run episode 47 with rewards -243.0\n",
      "============ 48 ===========\n",
      "Run episode 48 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 49 ===========\n",
      "Run episode 49 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 50 ===========\n",
      "Run episode 50 with rewards -500.0\n",
      "============ 51 ===========\n",
      "Run episode 51 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 52 ===========\n",
      "Run episode 52 with rewards -500.0\n",
      "============ 53 ===========\n",
      "Run episode 53 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 54 ===========\n",
      "Run episode 54 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 55 ===========\n",
      "Run episode 55 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 56 ===========\n",
      "Run episode 56 with rewards -500.0\n",
      "============ 57 ===========\n",
      "Run episode 57 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 58 ===========\n",
      "Run episode 58 with rewards -500.0\n",
      "============ 59 ===========\n",
      "Run episode 59 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 60 ===========\n",
      "Run episode 60 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 61 ===========\n",
      "Run episode 61 with rewards -500.0\n",
      "============ 62 ===========\n",
      "Run episode 62 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 63 ===========\n",
      "Run episode 63 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 64 ===========\n",
      "Run episode 64 with rewards -500.0\n",
      "============ 65 ===========\n",
      "Run episode 65 with rewards -123.0\n",
      "best\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 66 ===========\n",
      "Run episode 66 with rewards -500.0\n",
      "============ 67 ===========\n",
      "Run episode 67 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 68 ===========\n",
      "Run episode 68 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 69 ===========\n",
      "Run episode 69 with rewards -500.0\n",
      "============ 70 ===========\n",
      "Run episode 70 with rewards -189.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 71 ===========\n",
      "Run episode 71 with rewards -500.0\n",
      "============ 72 ===========\n",
      "Run episode 72 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 73 ===========\n",
      "Run episode 73 with rewards -155.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 74 ===========\n",
      "Run episode 74 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 75 ===========\n",
      "Run episode 75 with rewards -500.0\n",
      "============ 76 ===========\n",
      "Run episode 76 with rewards -500.0\n",
      "============ 77 ===========\n",
      "Run episode 77 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 78 ===========\n",
      "Run episode 78 with rewards -75.0\n",
      "best\n",
      "============ 79 ===========\n",
      "Run episode 79 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 80 ===========\n",
      "Run episode 80 with rewards -77.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 81 ===========\n",
      "Run episode 81 with rewards -500.0\n",
      "============ 82 ===========\n",
      "Run episode 82 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 83 ===========\n",
      "Run episode 83 with rewards -500.0\n",
      "============ 84 ===========\n",
      "Run episode 84 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 85 ===========\n",
      "Run episode 85 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 86 ===========\n",
      "Run episode 86 with rewards -500.0\n",
      "============ 87 ===========\n",
      "Run episode 87 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 88 ===========\n",
      "Run episode 88 with rewards -500.0\n",
      "============ 89 ===========\n",
      "Run episode 89 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 90 ===========\n",
      "Run episode 90 with rewards -500.0\n",
      "============ 91 ===========\n",
      "Run episode 91 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 92 ===========\n",
      "Run episode 92 with rewards -452.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 93 ===========\n",
      "Run episode 93 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 94 ===========\n",
      "Run episode 94 with rewards -500.0\n",
      "============ 95 ===========\n",
      "Run episode 95 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 96 ===========\n",
      "Run episode 96 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 97 ===========\n",
      "Run episode 97 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "origin 0.04002085976086109 new 0.04002085976086161 not updated\n",
      "============ 98 ===========\n",
      "Run episode 98 with rewards -500.0\n",
      "============ 99 ===========\n",
      "Run episode 99 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 100 ===========\n",
      "Run episode 100 with rewards -158.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 101 ===========\n",
      "Run episode 101 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 102 ===========\n",
      "Run episode 102 with rewards -500.0\n",
      "============ 103 ===========\n",
      "Run episode 103 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 104 ===========\n",
      "Run episode 104 with rewards -93.0\n",
      "============ 105 ===========\n",
      "Run episode 105 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 106 ===========\n",
      "Run episode 106 with rewards -500.0\n",
      "============ 107 ===========\n",
      "Run episode 107 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 108 ===========\n",
      "Run episode 108 with rewards -160.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 109 ===========\n",
      "Run episode 109 with rewards -500.0\n",
      "============ 110 ===========\n",
      "Run episode 110 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 111 ===========\n",
      "Run episode 111 with rewards -333.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 112 ===========\n",
      "Run episode 112 with rewards -313.0\n",
      "============ 113 ===========\n",
      "Run episode 113 with rewards -339.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 114 ===========\n",
      "Run episode 114 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 115 ===========\n",
      "Run episode 115 with rewards -500.0\n",
      "============ 116 ===========\n",
      "Run episode 116 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 117 ===========\n",
      "Run episode 117 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 118 ===========\n",
      "Run episode 118 with rewards -500.0\n",
      "============ 119 ===========\n",
      "Run episode 119 with rewards -500.0\n",
      "origin 0.039922393128230094 new 0.03992239313091827 not updated\n",
      "============ 120 ===========\n",
      "Run episode 120 with rewards -500.0\n",
      "============ 121 ===========\n",
      "Run episode 121 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 122 ===========\n",
      "Run episode 122 with rewards -500.0\n",
      "============ 123 ===========\n",
      "Run episode 123 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 124 ===========\n",
      "Run episode 124 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 125 ===========\n",
      "Run episode 125 with rewards -500.0\n",
      "============ 126 ===========\n",
      "Run episode 126 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 127 ===========\n",
      "Run episode 127 with rewards -500.0\n",
      "============ 128 ===========\n",
      "Run episode 128 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 129 ===========\n",
      "Run episode 129 with rewards -310.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 130 ===========\n",
      "Run episode 130 with rewards -500.0\n",
      "============ 131 ===========\n",
      "Run episode 131 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 132 ===========\n",
      "Run episode 132 with rewards -500.0\n",
      "============ 133 ===========\n",
      "Run episode 133 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 134 ===========\n",
      "Run episode 134 with rewards -218.0\n",
      "============ 135 ===========\n",
      "Run episode 135 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 136 ===========\n",
      "Run episode 136 with rewards -143.0\n",
      "============ 137 ===========\n",
      "Run episode 137 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 138 ===========\n",
      "Run episode 138 with rewards -205.0\n",
      "============ 139 ===========\n",
      "Run episode 139 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 140 ===========\n",
      "Run episode 140 with rewards -500.0\n",
      "============ 141 ===========\n",
      "Run episode 141 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 142 ===========\n",
      "Run episode 142 with rewards -485.0\n",
      "============ 143 ===========\n",
      "Run episode 143 with rewards -500.0\n",
      "============ 144 ===========\n",
      "Run episode 144 with rewards -126.0\n",
      "============ 145 ===========\n",
      "Run episode 145 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 146 ===========\n",
      "Run episode 146 with rewards -500.0\n",
      "============ 147 ===========\n",
      "Run episode 147 with rewards -500.0\n",
      "============ 148 ===========\n",
      "Run episode 148 with rewards -500.0\n",
      "============ 149 ===========\n",
      "Run episode 149 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 150 ===========\n",
      "Run episode 150 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 151 ===========\n",
      "Run episode 151 with rewards -500.0\n",
      "============ 152 ===========\n",
      "Run episode 152 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 153 ===========\n",
      "Run episode 153 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 154 ===========\n",
      "Run episode 154 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 155 ===========\n",
      "Run episode 155 with rewards -500.0\n",
      "============ 156 ===========\n",
      "Run episode 156 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 157 ===========\n",
      "Run episode 157 with rewards -395.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 158 ===========\n",
      "Run episode 158 with rewards -300.0\n",
      "============ 159 ===========\n",
      "Run episode 159 with rewards -500.0\n",
      "============ 160 ===========\n",
      "Run episode 160 with rewards -500.0\n",
      "============ 161 ===========\n",
      "Run episode 161 with rewards -183.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 162 ===========\n",
      "Run episode 162 with rewards -500.0\n",
      "============ 163 ===========\n",
      "Run episode 163 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 164 ===========\n",
      "Run episode 164 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 165 ===========\n",
      "Run episode 165 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 166 ===========\n",
      "Run episode 166 with rewards -500.0\n",
      "============ 167 ===========\n",
      "Run episode 167 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 168 ===========\n",
      "Run episode 168 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 169 ===========\n",
      "Run episode 169 with rewards -500.0\n",
      "============ 170 ===========\n",
      "Run episode 170 with rewards -500.0\n",
      "============ 171 ===========\n",
      "Run episode 171 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 172 ===========\n",
      "Run episode 172 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 173 ===========\n",
      "Run episode 173 with rewards -500.0\n",
      "============ 174 ===========\n",
      "Run episode 174 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 175 ===========\n",
      "Run episode 175 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 176 ===========\n",
      "Run episode 176 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 177 ===========\n",
      "Run episode 177 with rewards -208.0\n",
      "============ 178 ===========\n",
      "Run episode 178 with rewards -500.0\n",
      "origin 0.03951095048738078 new 0.039510950488151536 not updated\n",
      "============ 179 ===========\n",
      "Run episode 179 with rewards -500.0\n",
      "============ 180 ===========\n",
      "Run episode 180 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 181 ===========\n",
      "Run episode 181 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 182 ===========\n",
      "Run episode 182 with rewards -500.0\n",
      "============ 183 ===========\n",
      "Run episode 183 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 184 ===========\n",
      "Run episode 184 with rewards -500.0\n",
      "============ 185 ===========\n",
      "Run episode 185 with rewards -389.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 186 ===========\n",
      "Run episode 186 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 187 ===========\n",
      "Run episode 187 with rewards -500.0\n",
      "============ 188 ===========\n",
      "Run episode 188 with rewards -500.0\n",
      "============ 189 ===========\n",
      "Run episode 189 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 190 ===========\n",
      "Run episode 190 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 191 ===========\n",
      "Run episode 191 with rewards -500.0\n",
      "============ 192 ===========\n",
      "Run episode 192 with rewards -500.0\n",
      "============ 193 ===========\n",
      "Run episode 193 with rewards -500.0\n",
      "============ 194 ===========\n",
      "Run episode 194 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 195 ===========\n",
      "Run episode 195 with rewards -500.0\n",
      "============ 196 ===========\n",
      "Run episode 196 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 197 ===========\n",
      "Run episode 197 with rewards -500.0\n",
      "============ 198 ===========\n",
      "Run episode 198 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 199 ===========\n",
      "Run episode 199 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 200 ===========\n",
      "Run episode 200 with rewards -500.0\n",
      "============ 201 ===========\n",
      "Run episode 201 with rewards -500.0\n",
      "============ 202 ===========\n",
      "Run episode 202 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 203 ===========\n",
      "Run episode 203 with rewards -500.0\n",
      "============ 204 ===========\n",
      "Run episode 204 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 205 ===========\n",
      "Run episode 205 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 206 ===========\n",
      "Run episode 206 with rewards -500.0\n",
      "============ 207 ===========\n",
      "Run episode 207 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 208 ===========\n",
      "Run episode 208 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 209 ===========\n",
      "Run episode 209 with rewards -500.0\n",
      "============ 210 ===========\n",
      "Run episode 210 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 211 ===========\n",
      "Run episode 211 with rewards -500.0\n",
      "============ 212 ===========\n",
      "Run episode 212 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 213 ===========\n",
      "Run episode 213 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 214 ===========\n",
      "Run episode 214 with rewards -500.0\n",
      "============ 215 ===========\n",
      "Run episode 215 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 216 ===========\n",
      "Run episode 216 with rewards -500.0\n",
      "============ 217 ===========\n",
      "Run episode 217 with rewards -500.0\n",
      "============ 218 ===========\n",
      "Run episode 218 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 219 ===========\n",
      "Run episode 219 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 220 ===========\n",
      "Run episode 220 with rewards -500.0\n",
      "============ 221 ===========\n",
      "Run episode 221 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 222 ===========\n",
      "Run episode 222 with rewards -500.0\n",
      "============ 223 ===========\n",
      "Run episode 223 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 224 ===========\n",
      "Run episode 224 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 225 ===========\n",
      "Run episode 225 with rewards -500.0\n",
      "============ 226 ===========\n",
      "Run episode 226 with rewards -500.0\n",
      "============ 227 ===========\n",
      "Run episode 227 with rewards -450.0\n",
      "============ 228 ===========\n",
      "Run episode 228 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 229 ===========\n",
      "Run episode 229 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 230 ===========\n",
      "Run episode 230 with rewards -500.0\n",
      "============ 231 ===========\n",
      "Run episode 231 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 232 ===========\n",
      "Run episode 232 with rewards -100.0\n",
      "============ 233 ===========\n",
      "Run episode 233 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 234 ===========\n",
      "Run episode 234 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 235 ===========\n",
      "Run episode 235 with rewards -500.0\n",
      "============ 236 ===========\n",
      "Run episode 236 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 237 ===========\n",
      "Run episode 237 with rewards -435.0\n",
      "============ 238 ===========\n",
      "Run episode 238 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 239 ===========\n",
      "Run episode 239 with rewards -410.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 240 ===========\n",
      "Run episode 240 with rewards -500.0\n",
      "============ 241 ===========\n",
      "Run episode 241 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 242 ===========\n",
      "Run episode 242 with rewards -166.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 243 ===========\n",
      "Run episode 243 with rewards -500.0\n",
      "============ 244 ===========\n",
      "Run episode 244 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 245 ===========\n",
      "Run episode 245 with rewards -432.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 246 ===========\n",
      "Run episode 246 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 247 ===========\n",
      "Run episode 247 with rewards -500.0\n",
      "============ 248 ===========\n",
      "Run episode 248 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 249 ===========\n",
      "Run episode 249 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 250 ===========\n",
      "Run episode 250 with rewards -500.0\n",
      "============ 251 ===========\n",
      "Run episode 251 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 252 ===========\n",
      "Run episode 252 with rewards -432.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 253 ===========\n",
      "Run episode 253 with rewards -500.0\n",
      "============ 254 ===========\n",
      "Run episode 254 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 255 ===========\n",
      "Run episode 255 with rewards -407.0\n",
      "============ 256 ===========\n",
      "Run episode 256 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 257 ===========\n",
      "Run episode 257 with rewards -352.0\n",
      "============ 258 ===========\n",
      "Run episode 258 with rewards -500.0\n",
      "============ 259 ===========\n",
      "Run episode 259 with rewards -500.0\n",
      "============ 260 ===========\n",
      "Run episode 260 with rewards -500.0\n",
      "============ 261 ===========\n",
      "Run episode 261 with rewards -500.0\n",
      "============ 262 ===========\n",
      "Run episode 262 with rewards -500.0\n",
      "============ 263 ===========\n",
      "Run episode 263 with rewards -236.0\n",
      "============ 264 ===========\n",
      "Run episode 264 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 265 ===========\n",
      "Run episode 265 with rewards -500.0\n",
      "============ 266 ===========\n",
      "Run episode 266 with rewards -500.0\n",
      "============ 267 ===========\n",
      "Run episode 267 with rewards -500.0\n",
      "============ 268 ===========\n",
      "Run episode 268 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 269 ===========\n",
      "Run episode 269 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 270 ===========\n",
      "Run episode 270 with rewards -500.0\n",
      "============ 271 ===========\n",
      "Run episode 271 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 272 ===========\n",
      "Run episode 272 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 273 ===========\n",
      "Run episode 273 with rewards -500.0\n",
      "============ 274 ===========\n",
      "Run episode 274 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 275 ===========\n",
      "Run episode 275 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 276 ===========\n",
      "Run episode 276 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 277 ===========\n",
      "Run episode 277 with rewards -500.0\n",
      "============ 278 ===========\n",
      "Run episode 278 with rewards -500.0\n",
      "============ 279 ===========\n",
      "Run episode 279 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 280 ===========\n",
      "Run episode 280 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 281 ===========\n",
      "Run episode 281 with rewards -500.0\n",
      "============ 282 ===========\n",
      "Run episode 282 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 283 ===========\n",
      "Run episode 283 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 284 ===========\n",
      "Run episode 284 with rewards -500.0\n",
      "============ 285 ===========\n",
      "Run episode 285 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 286 ===========\n",
      "Run episode 286 with rewards -500.0\n",
      "============ 287 ===========\n",
      "Run episode 287 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 288 ===========\n",
      "Run episode 288 with rewards -500.0\n",
      "============ 289 ===========\n",
      "Run episode 289 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 290 ===========\n",
      "Run episode 290 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 291 ===========\n",
      "Run episode 291 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 292 ===========\n",
      "Run episode 292 with rewards -500.0\n",
      "============ 293 ===========\n",
      "Run episode 293 with rewards -424.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 294 ===========\n",
      "Run episode 294 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 295 ===========\n",
      "Run episode 295 with rewards -500.0\n",
      "============ 296 ===========\n",
      "Run episode 296 with rewards -500.0\n",
      "============ 297 ===========\n",
      "Run episode 297 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 298 ===========\n",
      "Run episode 298 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 299 ===========\n",
      "Run episode 299 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 300 ===========\n",
      "Run episode 300 with rewards -500.0\n",
      "============ 301 ===========\n",
      "Run episode 301 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 302 ===========\n",
      "Run episode 302 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 303 ===========\n",
      "Run episode 303 with rewards -500.0\n",
      "============ 304 ===========\n",
      "Run episode 304 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 305 ===========\n",
      "Run episode 305 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 306 ===========\n",
      "Run episode 306 with rewards -500.0\n",
      "============ 307 ===========\n",
      "Run episode 307 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 308 ===========\n",
      "Run episode 308 with rewards -500.0\n",
      "============ 309 ===========\n",
      "Run episode 309 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 310 ===========\n",
      "Run episode 310 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 311 ===========\n",
      "Run episode 311 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 312 ===========\n",
      "Run episode 312 with rewards -500.0\n",
      "============ 313 ===========\n",
      "Run episode 313 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 314 ===========\n",
      "Run episode 314 with rewards -500.0\n",
      "============ 315 ===========\n",
      "Run episode 315 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 316 ===========\n",
      "Run episode 316 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 317 ===========\n",
      "Run episode 317 with rewards -500.0\n",
      "============ 318 ===========\n",
      "Run episode 318 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 319 ===========\n",
      "Run episode 319 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 320 ===========\n",
      "Run episode 320 with rewards -500.0\n",
      "============ 321 ===========\n",
      "Run episode 321 with rewards -254.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 322 ===========\n",
      "Run episode 322 with rewards -500.0\n",
      "============ 323 ===========\n",
      "Run episode 323 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 324 ===========\n",
      "Run episode 324 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 325 ===========\n",
      "Run episode 325 with rewards -500.0\n",
      "============ 326 ===========\n",
      "Run episode 326 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 327 ===========\n",
      "Run episode 327 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 328 ===========\n",
      "Run episode 328 with rewards -500.0\n",
      "============ 329 ===========\n",
      "Run episode 329 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 330 ===========\n",
      "Run episode 330 with rewards -500.0\n",
      "============ 331 ===========\n",
      "Run episode 331 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 332 ===========\n",
      "Run episode 332 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 333 ===========\n",
      "Run episode 333 with rewards -500.0\n",
      "============ 334 ===========\n",
      "Run episode 334 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 335 ===========\n",
      "Run episode 335 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 336 ===========\n",
      "Run episode 336 with rewards -500.0\n",
      "============ 337 ===========\n",
      "Run episode 337 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 338 ===========\n",
      "Run episode 338 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 339 ===========\n",
      "Run episode 339 with rewards -500.0\n",
      "============ 340 ===========\n",
      "Run episode 340 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 341 ===========\n",
      "Run episode 341 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 342 ===========\n",
      "Run episode 342 with rewards -190.0\n",
      "============ 343 ===========\n",
      "Run episode 343 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 344 ===========\n",
      "Run episode 344 with rewards -500.0\n",
      "============ 345 ===========\n",
      "Run episode 345 with rewards -444.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 346 ===========\n",
      "Run episode 346 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 347 ===========\n",
      "Run episode 347 with rewards -196.0\n",
      "============ 348 ===========\n",
      "Run episode 348 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 349 ===========\n",
      "Run episode 349 with rewards -500.0\n",
      "============ 350 ===========\n",
      "Run episode 350 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 351 ===========\n",
      "Run episode 351 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 352 ===========\n",
      "Run episode 352 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 353 ===========\n",
      "Run episode 353 with rewards -192.0\n",
      "============ 354 ===========\n",
      "Run episode 354 with rewards -500.0\n",
      "============ 355 ===========\n",
      "Run episode 355 with rewards -500.0\n",
      "============ 356 ===========\n",
      "Run episode 356 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 357 ===========\n",
      "Run episode 357 with rewards -201.0\n",
      "============ 358 ===========\n",
      "Run episode 358 with rewards -500.0\n",
      "============ 359 ===========\n",
      "Run episode 359 with rewards -500.0\n",
      "============ 360 ===========\n",
      "Run episode 360 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 361 ===========\n",
      "Run episode 361 with rewards -342.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 362 ===========\n",
      "Run episode 362 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 363 ===========\n",
      "Run episode 363 with rewards -500.0\n",
      "============ 364 ===========\n",
      "Run episode 364 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 365 ===========\n",
      "Run episode 365 with rewards -124.0\n",
      "============ 366 ===========\n",
      "Run episode 366 with rewards -500.0\n",
      "============ 367 ===========\n",
      "Run episode 367 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 368 ===========\n",
      "Run episode 368 with rewards -500.0\n",
      "============ 369 ===========\n",
      "Run episode 369 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 370 ===========\n",
      "Run episode 370 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 371 ===========\n",
      "Run episode 371 with rewards -500.0\n",
      "============ 372 ===========\n",
      "Run episode 372 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 373 ===========\n",
      "Run episode 373 with rewards -72.0\n",
      "best\n",
      "============ 374 ===========\n",
      "Run episode 374 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 375 ===========\n",
      "Run episode 375 with rewards -249.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 376 ===========\n",
      "Run episode 376 with rewards -500.0\n",
      "============ 377 ===========\n",
      "Run episode 377 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 378 ===========\n",
      "Run episode 378 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 379 ===========\n",
      "Run episode 379 with rewards -500.0\n",
      "============ 380 ===========\n",
      "Run episode 380 with rewards -500.0\n",
      "============ 381 ===========\n",
      "Run episode 381 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 382 ===========\n",
      "Run episode 382 with rewards -262.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 383 ===========\n",
      "Run episode 383 with rewards -500.0\n",
      "============ 384 ===========\n",
      "Run episode 384 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 385 ===========\n",
      "Run episode 385 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 386 ===========\n",
      "Run episode 386 with rewards -500.0\n",
      "============ 387 ===========\n",
      "Run episode 387 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 388 ===========\n",
      "Run episode 388 with rewards -500.0\n",
      "============ 389 ===========\n",
      "Run episode 389 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 390 ===========\n",
      "Run episode 390 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 391 ===========\n",
      "Run episode 391 with rewards -99.0\n",
      "============ 392 ===========\n",
      "Run episode 392 with rewards -500.0\n",
      "origin 0.035556554918226714 new 0.03555655492080532 not updated\n",
      "============ 393 ===========\n",
      "Run episode 393 with rewards -102.0\n",
      "============ 394 ===========\n",
      "Run episode 394 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 395 ===========\n",
      "Run episode 395 with rewards -500.0\n",
      "============ 396 ===========\n",
      "Run episode 396 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 397 ===========\n",
      "Run episode 397 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 398 ===========\n",
      "Run episode 398 with rewards -500.0\n",
      "============ 399 ===========\n",
      "Run episode 399 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 400 ===========\n",
      "Run episode 400 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 401 ===========\n",
      "Run episode 401 with rewards -500.0\n",
      "============ 402 ===========\n",
      "Run episode 402 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 403 ===========\n",
      "Run episode 403 with rewards -500.0\n",
      "============ 404 ===========\n",
      "Run episode 404 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 405 ===========\n",
      "Run episode 405 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 406 ===========\n",
      "Run episode 406 with rewards -500.0\n",
      "============ 407 ===========\n",
      "Run episode 407 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 408 ===========\n",
      "Run episode 408 with rewards -500.0\n",
      "============ 409 ===========\n",
      "Run episode 409 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 410 ===========\n",
      "Run episode 410 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 411 ===========\n",
      "Run episode 411 with rewards -500.0\n",
      "============ 412 ===========\n",
      "Run episode 412 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 413 ===========\n",
      "Run episode 413 with rewards -500.0\n",
      "============ 414 ===========\n",
      "Run episode 414 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 415 ===========\n",
      "Run episode 415 with rewards -244.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 416 ===========\n",
      "Run episode 416 with rewards -500.0\n",
      "============ 417 ===========\n",
      "Run episode 417 with rewards -145.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 418 ===========\n",
      "Run episode 418 with rewards -500.0\n",
      "============ 419 ===========\n",
      "Run episode 419 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 420 ===========\n",
      "Run episode 420 with rewards -500.0\n",
      "============ 421 ===========\n",
      "Run episode 421 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 422 ===========\n",
      "Run episode 422 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 423 ===========\n",
      "Run episode 423 with rewards -500.0\n",
      "============ 424 ===========\n",
      "Run episode 424 with rewards -100.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 425 ===========\n",
      "Run episode 425 with rewards -500.0\n",
      "============ 426 ===========\n",
      "Run episode 426 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 427 ===========\n",
      "Run episode 427 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 428 ===========\n",
      "Run episode 428 with rewards -500.0\n",
      "============ 429 ===========\n",
      "Run episode 429 with rewards -211.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 430 ===========\n",
      "Run episode 430 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 431 ===========\n",
      "Run episode 431 with rewards -500.0\n",
      "============ 432 ===========\n",
      "Run episode 432 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 433 ===========\n",
      "Run episode 433 with rewards -500.0\n",
      "============ 434 ===========\n",
      "Run episode 434 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 435 ===========\n",
      "Run episode 435 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 436 ===========\n",
      "Run episode 436 with rewards -500.0\n",
      "============ 437 ===========\n",
      "Run episode 437 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 438 ===========\n",
      "Run episode 438 with rewards -500.0\n",
      "============ 439 ===========\n",
      "Run episode 439 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 440 ===========\n",
      "Run episode 440 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 441 ===========\n",
      "Run episode 441 with rewards -500.0\n",
      "============ 442 ===========\n",
      "Run episode 442 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 443 ===========\n",
      "Run episode 443 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 444 ===========\n",
      "Run episode 444 with rewards -500.0\n",
      "============ 445 ===========\n",
      "Run episode 445 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 446 ===========\n",
      "Run episode 446 with rewards -500.0\n",
      "============ 447 ===========\n",
      "Run episode 447 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 448 ===========\n",
      "Run episode 448 with rewards -500.0\n",
      "============ 449 ===========\n",
      "Run episode 449 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 450 ===========\n",
      "Run episode 450 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 451 ===========\n",
      "Run episode 451 with rewards -500.0\n",
      "============ 452 ===========\n",
      "Run episode 452 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 453 ===========\n",
      "Run episode 453 with rewards -472.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 454 ===========\n",
      "Run episode 454 with rewards -500.0\n",
      "============ 455 ===========\n",
      "Run episode 455 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 456 ===========\n",
      "Run episode 456 with rewards -500.0\n",
      "============ 457 ===========\n",
      "Run episode 457 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 458 ===========\n",
      "Run episode 458 with rewards -500.0\n",
      "============ 459 ===========\n",
      "Run episode 459 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 460 ===========\n",
      "Run episode 460 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 461 ===========\n",
      "Run episode 461 with rewards -500.0\n",
      "============ 462 ===========\n",
      "Run episode 462 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 463 ===========\n",
      "Run episode 463 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 464 ===========\n",
      "Run episode 464 with rewards -500.0\n",
      "============ 465 ===========\n",
      "Run episode 465 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 466 ===========\n",
      "Run episode 466 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 467 ===========\n",
      "Run episode 467 with rewards -500.0\n",
      "============ 468 ===========\n",
      "Run episode 468 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 469 ===========\n",
      "Run episode 469 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 470 ===========\n",
      "Run episode 470 with rewards -500.0\n",
      "============ 471 ===========\n",
      "Run episode 471 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 472 ===========\n",
      "Run episode 472 with rewards -500.0\n",
      "============ 473 ===========\n",
      "Run episode 473 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 474 ===========\n",
      "Run episode 474 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 475 ===========\n",
      "Run episode 475 with rewards -500.0\n",
      "============ 476 ===========\n",
      "Run episode 476 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 477 ===========\n",
      "Run episode 477 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 478 ===========\n",
      "Run episode 478 with rewards -500.0\n",
      "============ 479 ===========\n",
      "Run episode 479 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 480 ===========\n",
      "Run episode 480 with rewards -482.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 481 ===========\n",
      "Run episode 481 with rewards -255.0\n",
      "============ 482 ===========\n",
      "Run episode 482 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 483 ===========\n",
      "Run episode 483 with rewards -500.0\n",
      "============ 484 ===========\n",
      "Run episode 484 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 485 ===========\n",
      "Run episode 485 with rewards -160.0\n",
      "============ 486 ===========\n",
      "Run episode 486 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 487 ===========\n",
      "Run episode 487 with rewards -500.0\n",
      "============ 488 ===========\n",
      "Run episode 488 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 489 ===========\n",
      "Run episode 489 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 490 ===========\n",
      "Run episode 490 with rewards -500.0\n",
      "============ 491 ===========\n",
      "Run episode 491 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 492 ===========\n",
      "Run episode 492 with rewards -148.0\n",
      "============ 493 ===========\n",
      "Run episode 493 with rewards -500.0\n",
      "============ 494 ===========\n",
      "Run episode 494 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 495 ===========\n",
      "Run episode 495 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 496 ===========\n",
      "Run episode 496 with rewards -500.0\n",
      "============ 497 ===========\n",
      "Run episode 497 with rewards -467.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 498 ===========\n",
      "Run episode 498 with rewards -500.0\n",
      "============ 499 ===========\n",
      "Run episode 499 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 500 ===========\n",
      "Run episode 500 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 501 ===========\n",
      "Run episode 501 with rewards -500.0\n",
      "============ 502 ===========\n",
      "Run episode 502 with rewards -500.0\n",
      "origin 0.03228583140020993 new 0.032285831400244526 not updated\n",
      "============ 503 ===========\n",
      "Run episode 503 with rewards -500.0\n",
      "============ 504 ===========\n",
      "Run episode 504 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 505 ===========\n",
      "Run episode 505 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 506 ===========\n",
      "Run episode 506 with rewards -500.0\n",
      "============ 507 ===========\n",
      "Run episode 507 with rewards -500.0\n",
      "============ 508 ===========\n",
      "Run episode 508 with rewards -500.0\n",
      "============ 509 ===========\n",
      "Run episode 509 with rewards -500.0\n",
      "origin 0.03208034807586339 new 0.03208034807605736 not updated\n",
      "============ 510 ===========\n",
      "Run episode 510 with rewards -500.0\n",
      "============ 511 ===========\n",
      "Run episode 511 with rewards -500.0\n",
      "origin 0.032024585052426634 new 0.032024585053923665 not updated\n",
      "============ 512 ===========\n",
      "Run episode 512 with rewards -500.0\n",
      "============ 513 ===========\n",
      "Run episode 513 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 514 ===========\n",
      "Run episode 514 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 515 ===========\n",
      "Run episode 515 with rewards -500.0\n",
      "============ 516 ===========\n",
      "Run episode 516 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 517 ===========\n",
      "Run episode 517 with rewards -500.0\n",
      "============ 518 ===========\n",
      "Run episode 518 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 519 ===========\n",
      "Run episode 519 with rewards -369.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 520 ===========\n",
      "Run episode 520 with rewards -500.0\n",
      "============ 521 ===========\n",
      "Run episode 521 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 522 ===========\n",
      "Run episode 522 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 523 ===========\n",
      "Run episode 523 with rewards -500.0\n",
      "============ 524 ===========\n",
      "Run episode 524 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 525 ===========\n",
      "Run episode 525 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 526 ===========\n",
      "Run episode 526 with rewards -500.0\n",
      "============ 527 ===========\n",
      "Run episode 527 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 528 ===========\n",
      "Run episode 528 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 529 ===========\n",
      "Run episode 529 with rewards -500.0\n",
      "============ 530 ===========\n",
      "Run episode 530 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 531 ===========\n",
      "Run episode 531 with rewards -95.0\n",
      "============ 532 ===========\n",
      "Run episode 532 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 533 ===========\n",
      "Run episode 533 with rewards -500.0\n",
      "============ 534 ===========\n",
      "Run episode 534 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 535 ===========\n",
      "Run episode 535 with rewards -279.0\n",
      "============ 536 ===========\n",
      "Run episode 536 with rewards -102.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 537 ===========\n",
      "Run episode 537 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 538 ===========\n",
      "Run episode 538 with rewards -500.0\n",
      "============ 539 ===========\n",
      "Run episode 539 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 540 ===========\n",
      "Run episode 540 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 541 ===========\n",
      "Run episode 541 with rewards -500.0\n",
      "============ 542 ===========\n",
      "Run episode 542 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 543 ===========\n",
      "Run episode 543 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 544 ===========\n",
      "Run episode 544 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 545 ===========\n",
      "Run episode 545 with rewards -500.0\n",
      "============ 546 ===========\n",
      "Run episode 546 with rewards -500.0\n",
      "============ 547 ===========\n",
      "Run episode 547 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 548 ===========\n",
      "Run episode 548 with rewards -162.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 549 ===========\n",
      "Run episode 549 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 550 ===========\n",
      "Run episode 550 with rewards -500.0\n",
      "============ 551 ===========\n",
      "Run episode 551 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 552 ===========\n",
      "Run episode 552 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 553 ===========\n",
      "Run episode 553 with rewards -500.0\n",
      "============ 554 ===========\n",
      "Run episode 554 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 555 ===========\n",
      "Run episode 555 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 556 ===========\n",
      "Run episode 556 with rewards -500.0\n",
      "============ 557 ===========\n",
      "Run episode 557 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 558 ===========\n",
      "Run episode 558 with rewards -94.0\n",
      "============ 559 ===========\n",
      "Run episode 559 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 560 ===========\n",
      "Run episode 560 with rewards -500.0\n",
      "============ 561 ===========\n",
      "Run episode 561 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 562 ===========\n",
      "Run episode 562 with rewards -300.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 563 ===========\n",
      "Run episode 563 with rewards -500.0\n",
      "============ 564 ===========\n",
      "Run episode 564 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 565 ===========\n",
      "Run episode 565 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 566 ===========\n",
      "Run episode 566 with rewards -500.0\n",
      "============ 567 ===========\n",
      "Run episode 567 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 568 ===========\n",
      "Run episode 568 with rewards -500.0\n",
      "============ 569 ===========\n",
      "Run episode 569 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 570 ===========\n",
      "Run episode 570 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 571 ===========\n",
      "Run episode 571 with rewards -500.0\n",
      "============ 572 ===========\n",
      "Run episode 572 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 573 ===========\n",
      "Run episode 573 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 574 ===========\n",
      "Run episode 574 with rewards -224.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 575 ===========\n",
      "Run episode 575 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 576 ===========\n",
      "Run episode 576 with rewards -500.0\n",
      "============ 577 ===========\n",
      "Run episode 577 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 578 ===========\n",
      "Run episode 578 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 579 ===========\n",
      "Run episode 579 with rewards -500.0\n",
      "============ 580 ===========\n",
      "Run episode 580 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 581 ===========\n",
      "Run episode 581 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 582 ===========\n",
      "Run episode 582 with rewards -500.0\n",
      "============ 583 ===========\n",
      "Run episode 583 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 584 ===========\n",
      "Run episode 584 with rewards -500.0\n",
      "============ 585 ===========\n",
      "Run episode 585 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 586 ===========\n",
      "Run episode 586 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 587 ===========\n",
      "Run episode 587 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 588 ===========\n",
      "Run episode 588 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 589 ===========\n",
      "Run episode 589 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 590 ===========\n",
      "Run episode 590 with rewards -500.0\n",
      "============ 591 ===========\n",
      "Run episode 591 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 592 ===========\n",
      "Run episode 592 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 593 ===========\n",
      "Run episode 593 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 594 ===========\n",
      "Run episode 594 with rewards -500.0\n",
      "============ 595 ===========\n",
      "Run episode 595 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 596 ===========\n",
      "Run episode 596 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 597 ===========\n",
      "Run episode 597 with rewards -500.0\n",
      "============ 598 ===========\n",
      "Run episode 598 with rewards -190.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 599 ===========\n",
      "Run episode 599 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 600 ===========\n",
      "Run episode 600 with rewards -500.0\n",
      "============ 601 ===========\n",
      "Run episode 601 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 602 ===========\n",
      "Run episode 602 with rewards -500.0\n",
      "============ 603 ===========\n",
      "Run episode 603 with rewards -500.0\n",
      "============ 604 ===========\n",
      "Run episode 604 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 605 ===========\n",
      "Run episode 605 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 606 ===========\n",
      "Run episode 606 with rewards -500.0\n",
      "============ 607 ===========\n",
      "Run episode 607 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 608 ===========\n",
      "Run episode 608 with rewards -500.0\n",
      "============ 609 ===========\n",
      "Run episode 609 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 610 ===========\n",
      "Run episode 610 with rewards -500.0\n",
      "============ 611 ===========\n",
      "Run episode 611 with rewards -500.0\n",
      "============ 612 ===========\n",
      "Run episode 612 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 613 ===========\n",
      "Run episode 613 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 614 ===========\n",
      "Run episode 614 with rewards -500.0\n",
      "============ 615 ===========\n",
      "Run episode 615 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 616 ===========\n",
      "Run episode 616 with rewards -500.0\n",
      "============ 617 ===========\n",
      "Run episode 617 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 618 ===========\n",
      "Run episode 618 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 619 ===========\n",
      "Run episode 619 with rewards -500.0\n",
      "============ 620 ===========\n",
      "Run episode 620 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 621 ===========\n",
      "Run episode 621 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 622 ===========\n",
      "Run episode 622 with rewards -500.0\n",
      "============ 623 ===========\n",
      "Run episode 623 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 624 ===========\n",
      "Run episode 624 with rewards -500.0\n",
      "============ 625 ===========\n",
      "Run episode 625 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 626 ===========\n",
      "Run episode 626 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 627 ===========\n",
      "Run episode 627 with rewards -500.0\n",
      "============ 628 ===========\n",
      "Run episode 628 with rewards -358.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 629 ===========\n",
      "Run episode 629 with rewards -500.0\n",
      "============ 630 ===========\n",
      "Run episode 630 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 631 ===========\n",
      "Run episode 631 with rewards -500.0\n",
      "============ 632 ===========\n",
      "Run episode 632 with rewards -500.0\n",
      "============ 633 ===========\n",
      "Run episode 633 with rewards -500.0\n",
      "============ 634 ===========\n",
      "Run episode 634 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 635 ===========\n",
      "Run episode 635 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 636 ===========\n",
      "Run episode 636 with rewards -500.0\n",
      "============ 637 ===========\n",
      "Run episode 637 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 638 ===========\n",
      "Run episode 638 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 639 ===========\n",
      "Run episode 639 with rewards -500.0\n",
      "============ 640 ===========\n",
      "Run episode 640 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 641 ===========\n",
      "Run episode 641 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 642 ===========\n",
      "Run episode 642 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 643 ===========\n",
      "Run episode 643 with rewards -500.0\n",
      "============ 644 ===========\n",
      "Run episode 644 with rewards -215.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 645 ===========\n",
      "Run episode 645 with rewards -266.0\n",
      "============ 646 ===========\n",
      "Run episode 646 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 647 ===========\n",
      "Run episode 647 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 648 ===========\n",
      "Run episode 648 with rewards -140.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 649 ===========\n",
      "Run episode 649 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 650 ===========\n",
      "Run episode 650 with rewards -500.0\n",
      "============ 651 ===========\n",
      "Run episode 651 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 652 ===========\n",
      "Run episode 652 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 653 ===========\n",
      "Run episode 653 with rewards -500.0\n",
      "============ 654 ===========\n",
      "Run episode 654 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 655 ===========\n",
      "Run episode 655 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 656 ===========\n",
      "Run episode 656 with rewards -500.0\n",
      "============ 657 ===========\n",
      "Run episode 657 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 658 ===========\n",
      "Run episode 658 with rewards -147.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 659 ===========\n",
      "Run episode 659 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 660 ===========\n",
      "Run episode 660 with rewards -500.0\n",
      "============ 661 ===========\n",
      "Run episode 661 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 662 ===========\n",
      "Run episode 662 with rewards -88.0\n",
      "============ 663 ===========\n",
      "Run episode 663 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 664 ===========\n",
      "Run episode 664 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 665 ===========\n",
      "Run episode 665 with rewards -500.0\n",
      "============ 666 ===========\n",
      "Run episode 666 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 667 ===========\n",
      "Run episode 667 with rewards -79.0\n",
      "============ 668 ===========\n",
      "Run episode 668 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 669 ===========\n",
      "Run episode 669 with rewards -101.0\n",
      "============ 670 ===========\n",
      "Run episode 670 with rewards -500.0\n",
      "============ 671 ===========\n",
      "Run episode 671 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 672 ===========\n",
      "Run episode 672 with rewards -500.0\n",
      "============ 673 ===========\n",
      "Run episode 673 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 674 ===========\n",
      "Run episode 674 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 675 ===========\n",
      "Run episode 675 with rewards -500.0\n",
      "============ 676 ===========\n",
      "Run episode 676 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 677 ===========\n",
      "Run episode 677 with rewards -500.0\n",
      "============ 678 ===========\n",
      "Run episode 678 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 679 ===========\n",
      "Run episode 679 with rewards -179.0\n",
      "============ 680 ===========\n",
      "Run episode 680 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 681 ===========\n",
      "Run episode 681 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 682 ===========\n",
      "Run episode 682 with rewards -500.0\n",
      "============ 683 ===========\n",
      "Run episode 683 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 684 ===========\n",
      "Run episode 684 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 685 ===========\n",
      "Run episode 685 with rewards -500.0\n",
      "============ 686 ===========\n",
      "Run episode 686 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 687 ===========\n",
      "Run episode 687 with rewards -269.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 688 ===========\n",
      "Run episode 688 with rewards -500.0\n",
      "============ 689 ===========\n",
      "Run episode 689 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 690 ===========\n",
      "Run episode 690 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 691 ===========\n",
      "Run episode 691 with rewards -500.0\n",
      "============ 692 ===========\n",
      "Run episode 692 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 693 ===========\n",
      "Run episode 693 with rewards -210.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 694 ===========\n",
      "Run episode 694 with rewards -500.0\n",
      "============ 695 ===========\n",
      "Run episode 695 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 696 ===========\n",
      "Run episode 696 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 697 ===========\n",
      "Run episode 697 with rewards -500.0\n",
      "============ 698 ===========\n",
      "Run episode 698 with rewards -500.0\n",
      "============ 699 ===========\n",
      "Run episode 699 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 700 ===========\n",
      "Run episode 700 with rewards -500.0\n",
      "============ 701 ===========\n",
      "Run episode 701 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 702 ===========\n",
      "Run episode 702 with rewards -238.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 703 ===========\n",
      "Run episode 703 with rewards -500.0\n",
      "============ 704 ===========\n",
      "Run episode 704 with rewards -500.0\n",
      "============ 705 ===========\n",
      "Run episode 705 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 706 ===========\n",
      "Run episode 706 with rewards -500.0\n",
      "============ 707 ===========\n",
      "Run episode 707 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 708 ===========\n",
      "Run episode 708 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 709 ===========\n",
      "Run episode 709 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 710 ===========\n",
      "Run episode 710 with rewards -500.0\n",
      "============ 711 ===========\n",
      "Run episode 711 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 712 ===========\n",
      "Run episode 712 with rewards -500.0\n",
      "============ 713 ===========\n",
      "Run episode 713 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 714 ===========\n",
      "Run episode 714 with rewards -376.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 715 ===========\n",
      "Run episode 715 with rewards -500.0\n",
      "============ 716 ===========\n",
      "Run episode 716 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 717 ===========\n",
      "Run episode 717 with rewards -500.0\n",
      "============ 718 ===========\n",
      "Run episode 718 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 719 ===========\n",
      "Run episode 719 with rewards -500.0\n",
      "============ 720 ===========\n",
      "Run episode 720 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 721 ===========\n",
      "Run episode 721 with rewards -500.0\n",
      "============ 722 ===========\n",
      "Run episode 722 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 723 ===========\n",
      "Run episode 723 with rewards -500.0\n",
      "============ 724 ===========\n",
      "Run episode 724 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 725 ===========\n",
      "Run episode 725 with rewards -500.0\n",
      "============ 726 ===========\n",
      "Run episode 726 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 727 ===========\n",
      "Run episode 727 with rewards -500.0\n",
      "============ 728 ===========\n",
      "Run episode 728 with rewards -500.0\n",
      "============ 729 ===========\n",
      "Run episode 729 with rewards -500.0\n",
      "============ 730 ===========\n",
      "Run episode 730 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 731 ===========\n",
      "Run episode 731 with rewards -500.0\n",
      "============ 732 ===========\n",
      "Run episode 732 with rewards -500.0\n",
      "============ 733 ===========\n",
      "Run episode 733 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 734 ===========\n",
      "Run episode 734 with rewards -500.0\n",
      "============ 735 ===========\n",
      "Run episode 735 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 736 ===========\n",
      "Run episode 736 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 737 ===========\n",
      "Run episode 737 with rewards -500.0\n",
      "============ 738 ===========\n",
      "Run episode 738 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 739 ===========\n",
      "Run episode 739 with rewards -500.0\n",
      "============ 740 ===========\n",
      "Run episode 740 with rewards -500.0\n",
      "============ 741 ===========\n",
      "Run episode 741 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 742 ===========\n",
      "Run episode 742 with rewards -500.0\n",
      "============ 743 ===========\n",
      "Run episode 743 with rewards -319.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 744 ===========\n",
      "Run episode 744 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 745 ===========\n",
      "Run episode 745 with rewards -500.0\n",
      "============ 746 ===========\n",
      "Run episode 746 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 747 ===========\n",
      "Run episode 747 with rewards -389.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 748 ===========\n",
      "Run episode 748 with rewards -500.0\n",
      "============ 749 ===========\n",
      "Run episode 749 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 750 ===========\n",
      "Run episode 750 with rewards -454.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 751 ===========\n",
      "Run episode 751 with rewards -500.0\n",
      "============ 752 ===========\n",
      "Run episode 752 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 753 ===========\n",
      "Run episode 753 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 754 ===========\n",
      "Run episode 754 with rewards -146.0\n",
      "============ 755 ===========\n",
      "Run episode 755 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 756 ===========\n",
      "Run episode 756 with rewards -500.0\n",
      "============ 757 ===========\n",
      "Run episode 757 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 758 ===========\n",
      "Run episode 758 with rewards -193.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 759 ===========\n",
      "Run episode 759 with rewards -500.0\n",
      "============ 760 ===========\n",
      "Run episode 760 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 761 ===========\n",
      "Run episode 761 with rewards -500.0\n",
      "============ 762 ===========\n",
      "Run episode 762 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 763 ===========\n",
      "Run episode 763 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 764 ===========\n",
      "Run episode 764 with rewards -129.0\n",
      "============ 765 ===========\n",
      "Run episode 765 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 766 ===========\n",
      "Run episode 766 with rewards -100.0\n",
      "============ 767 ===========\n",
      "Run episode 767 with rewards -500.0\n",
      "============ 768 ===========\n",
      "Run episode 768 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 769 ===========\n",
      "Run episode 769 with rewards -500.0\n",
      "============ 770 ===========\n",
      "Run episode 770 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 771 ===========\n",
      "Run episode 771 with rewards -154.0\n",
      "============ 772 ===========\n",
      "Run episode 772 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 773 ===========\n",
      "Run episode 773 with rewards -193.0\n",
      "============ 774 ===========\n",
      "Run episode 774 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 775 ===========\n",
      "Run episode 775 with rewards -354.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 776 ===========\n",
      "Run episode 776 with rewards -500.0\n",
      "============ 777 ===========\n",
      "Run episode 777 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 778 ===========\n",
      "Run episode 778 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 779 ===========\n",
      "Run episode 779 with rewards -500.0\n",
      "============ 780 ===========\n",
      "Run episode 780 with rewards -187.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 781 ===========\n",
      "Run episode 781 with rewards -175.0\n",
      "============ 782 ===========\n",
      "Run episode 782 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 783 ===========\n",
      "Run episode 783 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 784 ===========\n",
      "Run episode 784 with rewards -500.0\n",
      "============ 785 ===========\n",
      "Run episode 785 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 786 ===========\n",
      "Run episode 786 with rewards -500.0\n",
      "============ 787 ===========\n",
      "Run episode 787 with rewards -294.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 788 ===========\n",
      "Run episode 788 with rewards -500.0\n",
      "============ 789 ===========\n",
      "Run episode 789 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 790 ===========\n",
      "Run episode 790 with rewards -203.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 791 ===========\n",
      "Run episode 791 with rewards -500.0\n",
      "============ 792 ===========\n",
      "Run episode 792 with rewards -467.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 793 ===========\n",
      "Run episode 793 with rewards -191.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 794 ===========\n",
      "Run episode 794 with rewards -500.0\n",
      "============ 795 ===========\n",
      "Run episode 795 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 796 ===========\n",
      "Run episode 796 with rewards -500.0\n",
      "============ 797 ===========\n",
      "Run episode 797 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 798 ===========\n",
      "Run episode 798 with rewards -102.0\n",
      "============ 799 ===========\n",
      "Run episode 799 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 800 ===========\n",
      "Run episode 800 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 801 ===========\n",
      "Run episode 801 with rewards -500.0\n",
      "============ 802 ===========\n",
      "Run episode 802 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 803 ===========\n",
      "Run episode 803 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 804 ===========\n",
      "Run episode 804 with rewards -500.0\n",
      "============ 805 ===========\n",
      "Run episode 805 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 806 ===========\n",
      "Run episode 806 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 807 ===========\n",
      "Run episode 807 with rewards -331.0\n",
      "============ 808 ===========\n",
      "Run episode 808 with rewards -412.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 809 ===========\n",
      "Run episode 809 with rewards -500.0\n",
      "============ 810 ===========\n",
      "Run episode 810 with rewards -500.0\n",
      "============ 811 ===========\n",
      "Run episode 811 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 812 ===========\n",
      "Run episode 812 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 813 ===========\n",
      "Run episode 813 with rewards -500.0\n",
      "============ 814 ===========\n",
      "Run episode 814 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 815 ===========\n",
      "Run episode 815 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 816 ===========\n",
      "Run episode 816 with rewards -500.0\n",
      "============ 817 ===========\n",
      "Run episode 817 with rewards -472.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 818 ===========\n",
      "Run episode 818 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 819 ===========\n",
      "Run episode 819 with rewards -500.0\n",
      "============ 820 ===========\n",
      "Run episode 820 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 821 ===========\n",
      "Run episode 821 with rewards -203.0\n",
      "============ 822 ===========\n",
      "Run episode 822 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 823 ===========\n",
      "Run episode 823 with rewards -500.0\n",
      "============ 824 ===========\n",
      "Run episode 824 with rewards -500.0\n",
      "============ 825 ===========\n",
      "Run episode 825 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 826 ===========\n",
      "Run episode 826 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 827 ===========\n",
      "Run episode 827 with rewards -500.0\n",
      "============ 828 ===========\n",
      "Run episode 828 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 829 ===========\n",
      "Run episode 829 with rewards -89.0\n",
      "============ 830 ===========\n",
      "Run episode 830 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 831 ===========\n",
      "Run episode 831 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 832 ===========\n",
      "Run episode 832 with rewards -500.0\n",
      "============ 833 ===========\n",
      "Run episode 833 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 834 ===========\n",
      "Run episode 834 with rewards -452.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 835 ===========\n",
      "Run episode 835 with rewards -500.0\n",
      "============ 836 ===========\n",
      "Run episode 836 with rewards -438.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 837 ===========\n",
      "Run episode 837 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 838 ===========\n",
      "Run episode 838 with rewards -500.0\n",
      "============ 839 ===========\n",
      "Run episode 839 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 840 ===========\n",
      "Run episode 840 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 841 ===========\n",
      "Run episode 841 with rewards -500.0\n",
      "============ 842 ===========\n",
      "Run episode 842 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 843 ===========\n",
      "Run episode 843 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 844 ===========\n",
      "Run episode 844 with rewards -500.0\n",
      "============ 845 ===========\n",
      "Run episode 845 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 846 ===========\n",
      "Run episode 846 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 847 ===========\n",
      "Run episode 847 with rewards -500.0\n",
      "============ 848 ===========\n",
      "Run episode 848 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 849 ===========\n",
      "Run episode 849 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 850 ===========\n",
      "Run episode 850 with rewards -500.0\n",
      "============ 851 ===========\n",
      "Run episode 851 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 852 ===========\n",
      "Run episode 852 with rewards -500.0\n",
      "============ 853 ===========\n",
      "Run episode 853 with rewards -500.0\n",
      "============ 854 ===========\n",
      "Run episode 854 with rewards -500.0\n",
      "============ 855 ===========\n",
      "Run episode 855 with rewards -193.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 856 ===========\n",
      "Run episode 856 with rewards -500.0\n",
      "============ 857 ===========\n",
      "Run episode 857 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 858 ===========\n",
      "Run episode 858 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 859 ===========\n",
      "Run episode 859 with rewards -500.0\n",
      "============ 860 ===========\n",
      "Run episode 860 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 861 ===========\n",
      "Run episode 861 with rewards -500.0\n",
      "============ 862 ===========\n",
      "Run episode 862 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 863 ===========\n",
      "Run episode 863 with rewards -211.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 864 ===========\n",
      "Run episode 864 with rewards -500.0\n",
      "============ 865 ===========\n",
      "Run episode 865 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 866 ===========\n",
      "Run episode 866 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 867 ===========\n",
      "Run episode 867 with rewards -500.0\n",
      "============ 868 ===========\n",
      "Run episode 868 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 869 ===========\n",
      "Run episode 869 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 870 ===========\n",
      "Run episode 870 with rewards -98.0\n",
      "============ 871 ===========\n",
      "Run episode 871 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 872 ===========\n",
      "Run episode 872 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 873 ===========\n",
      "Run episode 873 with rewards -500.0\n",
      "============ 874 ===========\n",
      "Run episode 874 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 875 ===========\n",
      "Run episode 875 with rewards -500.0\n",
      "============ 876 ===========\n",
      "Run episode 876 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 877 ===========\n",
      "Run episode 877 with rewards -500.0\n",
      "============ 878 ===========\n",
      "Run episode 878 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 879 ===========\n",
      "Run episode 879 with rewards -278.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 880 ===========\n",
      "Run episode 880 with rewards -500.0\n",
      "============ 881 ===========\n",
      "Run episode 881 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 882 ===========\n",
      "Run episode 882 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 883 ===========\n",
      "Run episode 883 with rewards -500.0\n",
      "============ 884 ===========\n",
      "Run episode 884 with rewards -483.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 885 ===========\n",
      "Run episode 885 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 886 ===========\n",
      "Run episode 886 with rewards -500.0\n",
      "============ 887 ===========\n",
      "Run episode 887 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 888 ===========\n",
      "Run episode 888 with rewards -500.0\n",
      "============ 889 ===========\n",
      "Run episode 889 with rewards -349.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 890 ===========\n",
      "Run episode 890 with rewards -138.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 891 ===========\n",
      "Run episode 891 with rewards -132.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 892 ===========\n",
      "Run episode 892 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 893 ===========\n",
      "Run episode 893 with rewards -500.0\n",
      "============ 894 ===========\n",
      "Run episode 894 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 895 ===========\n",
      "Run episode 895 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 896 ===========\n",
      "Run episode 896 with rewards -500.0\n",
      "============ 897 ===========\n",
      "Run episode 897 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 898 ===========\n",
      "Run episode 898 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 899 ===========\n",
      "Run episode 899 with rewards -500.0\n",
      "============ 900 ===========\n",
      "Run episode 900 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 901 ===========\n",
      "Run episode 901 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 902 ===========\n",
      "Run episode 902 with rewards -500.0\n",
      "============ 903 ===========\n",
      "Run episode 903 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 904 ===========\n",
      "Run episode 904 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 905 ===========\n",
      "Run episode 905 with rewards -500.0\n",
      "============ 906 ===========\n",
      "Run episode 906 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 907 ===========\n",
      "Run episode 907 with rewards -500.0\n",
      "============ 908 ===========\n",
      "Run episode 908 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 909 ===========\n",
      "Run episode 909 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 910 ===========\n",
      "Run episode 910 with rewards -500.0\n",
      "============ 911 ===========\n",
      "Run episode 911 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 912 ===========\n",
      "Run episode 912 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 913 ===========\n",
      "Run episode 913 with rewards -500.0\n",
      "============ 914 ===========\n",
      "Run episode 914 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 915 ===========\n",
      "Run episode 915 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 916 ===========\n",
      "Run episode 916 with rewards -500.0\n",
      "============ 917 ===========\n",
      "Run episode 917 with rewards -203.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 918 ===========\n",
      "Run episode 918 with rewards -500.0\n",
      "============ 919 ===========\n",
      "Run episode 919 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 920 ===========\n",
      "Run episode 920 with rewards -254.0\n",
      "============ 921 ===========\n",
      "Run episode 921 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 922 ===========\n",
      "Run episode 922 with rewards -500.0\n",
      "============ 923 ===========\n",
      "Run episode 923 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 924 ===========\n",
      "Run episode 924 with rewards -500.0\n",
      "============ 925 ===========\n",
      "Run episode 925 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 926 ===========\n",
      "Run episode 926 with rewards -500.0\n",
      "============ 927 ===========\n",
      "Run episode 927 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 928 ===========\n",
      "Run episode 928 with rewards -500.0\n",
      "============ 929 ===========\n",
      "Run episode 929 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 930 ===========\n",
      "Run episode 930 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 931 ===========\n",
      "Run episode 931 with rewards -500.0\n",
      "============ 932 ===========\n",
      "Run episode 932 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 933 ===========\n",
      "Run episode 933 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 934 ===========\n",
      "Run episode 934 with rewards -500.0\n",
      "============ 935 ===========\n",
      "Run episode 935 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 936 ===========\n",
      "Run episode 936 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 937 ===========\n",
      "Run episode 937 with rewards -500.0\n",
      "============ 938 ===========\n",
      "Run episode 938 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 939 ===========\n",
      "Run episode 939 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 940 ===========\n",
      "Run episode 940 with rewards -500.0\n",
      "============ 941 ===========\n",
      "Run episode 941 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 942 ===========\n",
      "Run episode 942 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 943 ===========\n",
      "Run episode 943 with rewards -500.0\n",
      "============ 944 ===========\n",
      "Run episode 944 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 945 ===========\n",
      "Run episode 945 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 946 ===========\n",
      "Run episode 946 with rewards -500.0\n",
      "============ 947 ===========\n",
      "Run episode 947 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 948 ===========\n",
      "Run episode 948 with rewards -500.0\n",
      "============ 949 ===========\n",
      "Run episode 949 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 950 ===========\n",
      "Run episode 950 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 951 ===========\n",
      "Run episode 951 with rewards -500.0\n",
      "============ 952 ===========\n",
      "Run episode 952 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 953 ===========\n",
      "Run episode 953 with rewards -500.0\n",
      "============ 954 ===========\n",
      "Run episode 954 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 955 ===========\n",
      "Run episode 955 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 956 ===========\n",
      "Run episode 956 with rewards -500.0\n",
      "============ 957 ===========\n",
      "Run episode 957 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 958 ===========\n",
      "Run episode 958 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 959 ===========\n",
      "Run episode 959 with rewards -500.0\n",
      "============ 960 ===========\n",
      "Run episode 960 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 961 ===========\n",
      "Run episode 961 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 962 ===========\n",
      "Run episode 962 with rewards -500.0\n",
      "============ 963 ===========\n",
      "Run episode 963 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 964 ===========\n",
      "Run episode 964 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 965 ===========\n",
      "Run episode 965 with rewards -500.0\n",
      "============ 966 ===========\n",
      "Run episode 966 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 967 ===========\n",
      "Run episode 967 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 968 ===========\n",
      "Run episode 968 with rewards -500.0\n",
      "============ 969 ===========\n",
      "Run episode 969 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 970 ===========\n",
      "Run episode 970 with rewards -500.0\n",
      "============ 971 ===========\n",
      "Run episode 971 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 972 ===========\n",
      "Run episode 972 with rewards -500.0\n",
      "============ 973 ===========\n",
      "Run episode 973 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 974 ===========\n",
      "Run episode 974 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 975 ===========\n",
      "Run episode 975 with rewards -500.0\n",
      "============ 976 ===========\n",
      "Run episode 976 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 977 ===========\n",
      "Run episode 977 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 978 ===========\n",
      "Run episode 978 with rewards -500.0\n",
      "============ 979 ===========\n",
      "Run episode 979 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 980 ===========\n",
      "Run episode 980 with rewards -353.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 981 ===========\n",
      "Run episode 981 with rewards -500.0\n",
      "============ 982 ===========\n",
      "Run episode 982 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 983 ===========\n",
      "Run episode 983 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 984 ===========\n",
      "Run episode 984 with rewards -500.0\n",
      "============ 985 ===========\n",
      "Run episode 985 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 986 ===========\n",
      "Run episode 986 with rewards -403.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 987 ===========\n",
      "Run episode 987 with rewards -500.0\n",
      "============ 988 ===========\n",
      "Run episode 988 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 989 ===========\n",
      "Run episode 989 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 990 ===========\n",
      "Run episode 990 with rewards -220.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 991 ===========\n",
      "Run episode 991 with rewards -500.0\n",
      "============ 992 ===========\n",
      "Run episode 992 with rewards -500.0\n",
      "============ 993 ===========\n",
      "Run episode 993 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 994 ===========\n",
      "Run episode 994 with rewards -118.0\n",
      "============ 995 ===========\n",
      "Run episode 995 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 996 ===========\n",
      "Run episode 996 with rewards -500.0\n",
      "============ 997 ===========\n",
      "Run episode 997 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 998 ===========\n",
      "Run episode 998 with rewards -500.0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:3: RuntimeWarning: overflow encountered in exp\n",
      "  exps = np.exp(all_values)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: overflow encountered in exp\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n",
      "/tmp/ipykernel_4394/4286230475.py:4: RuntimeWarning: invalid value encountered in divide\n",
      "  res = np.exp(all_values[np.arange(values.shape[0]), action]) / np.sum(exps, axis=1)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "============ 999 ===========\n",
      "Run episode 999 with rewards -398.0\n",
      "\n",
      "Done\n"
     ]
    }
   ],
   "source": [
    "rewards = train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "3d20e8e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----------------------\n",
      "   +8.7846 if w1>=-0.09081626087427139 & w2<=0.2299093216657639\n",
      "   +9.3353 if cos2>=0.8940913081169128\n",
      "  +10.4909 if sin2<=0.44979619383812\n",
      "   +8.2458 if sin2<=0.37771036624908455 & w1>=-0.12610410898923854 & w2<=0.15699735283851654\n",
      "  +10.6541 if \n",
      "   +6.8474 if sin1<=0.24219990968704252 & w2<=0.04877336323261269\n",
      "   +1.2850 if cos2>=0.9911528468132019 & sin1<=-0.01331976410001516\n",
      "   +1.7239 if sin1<=-0.014723045006394384 & w2<=0.028690411895513537\n",
      "   +3.5813 if cos1>=0.8881555199623108 & sin1>=-0.28723066449165346 & w1<=0.7724048852920534\n",
      "   +1.3335 if cos1>=0.48252303600311286 & w1<=1.78991277217865 & w1>=-1.704880380630493\n",
      "   +1.0000 if cos2<=0.9513723731040955 & sin1>=0.017190182954072954\n",
      "   +1.0000 if \n",
      "-----------------------\n",
      "   -3.0189 if cos2<=0.9931913375854492\n",
      "   -3.3464 if cos1<=0.9904826760292054 & w2<=0.12676677107810977\n",
      "   -8.6917 if cos1>=0.8178704619407654 & w1<=0.9226453423500064\n",
      "   -5.4697 if \n",
      "   -9.3263 if w1<=0.21755264103412641 & w2>=-0.10354194939136505\n",
      "   -5.5746 if sin2<=0.37771036624908455 & w1<=1.2867938518524171 & w2>=0.15699735283851654\n",
      "   +5.4709 if sin1>=-0.2664733499288558 & sin2<=0.44499904513359073 & w1>=-1.0710395932197567 & w2<=0.04877336323261269\n",
      "   -8.2010 if sin1<=0.04399238973855973\n",
      "   +1.5784 if cos1>=0.9934104084968567\n",
      "   -9.7897 if sin1<=0.1551332503557205 & sin1>=-0.7060805082321167 & sin2>=-0.17948884963989245 & w1<=1.78991277217865 & w1>=-1.704880380630493\n",
      "   -1.2848 if cos1>=0.996736490726471\n",
      "   +1.0000 if cos2<=0.9935046672821045 & sin1<=-0.21224188804626462 & w1>=0.11238409280777001 & w2<=-0.22888346612453456\n",
      "-----------------------\n",
      "   +3.5980 if cos1>=0.996890127658844\n",
      "   +2.5385 if cos1>=0.9904826760292054\n",
      "  +16.7525 if \n",
      "   -3.7130 if cos1<=0.9999975085258483 & sin2<=-0.017886706441640855\n",
      "   -1.9920 if sin2<=-0.0018978121224790757\n",
      "  +12.0937 if w1<=-0.021795783191919316 & w2>=0.049939500540494924\n",
      "   +9.6534 if w2>=0.049939500540494924\n",
      "   +1.1957 if cos2>=0.24061650335788728 & w1>=-0.3275569915771483 & w2<=1.6480346202850342\n",
      "   -3.6820 if w1>=-0.12540029883384704\n",
      "  +15.8530 if cos1>=0.8881555199623108 & w2>=-0.4170232594013214\n",
      "  +10.0707 if cos1<=0.996736490726471 & cos2>=0.9245671033859253\n",
      "   +1.6685 if cos2<=0.9935046672821045 & sin1<=-0.21224188804626462 & w1>=0.11238409280777001 & w2<=-0.22888346612453456\n"
     ]
    }
   ],
   "source": [
    "for i in range(action_space):\n",
    "    print('-----------------------')\n",
    "    print(actor.best_model[i].rules_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "0b443f87",
   "metadata": {},
   "outputs": [],
   "source": [
    "def test(num_test_episodes=100):\n",
    "    \"\"\"\n",
    "    Test the learned policy using the trained actor model.\n",
    "\n",
    "    Args:\n",
    "        num_test_episodes (int): Number of episodes to test the model.\n",
    "\n",
    "    Returns:\n",
    "        float: Average reward over the test episodes.\n",
    "    \"\"\"\n",
    "    total_reward = 0\n",
    "\n",
    "    for episode in range(num_test_episodes):\n",
    "        state = env.reset()[0]  # Reset the environment and get the initial state\n",
    "        episode_reward = 0\n",
    "\n",
    "        for t in range(1, 10000):  # Limit the number of time steps\n",
    "            # Convert state to tensor and predict action probabilities\n",
    "            #             state_tensor = torch.tensor(state, dtype=torch.float32).to(device)\n",
    "            action_probs = actor.predict(pd.DataFrame(np.array([state]), columns=column_names))\n",
    "            # Select action based on the highest probability\n",
    "            action = np.argmax(action_probs)\n",
    "            # Take the chosen action\n",
    "            next_state, reward, done, trunc, _ = env.step(action)\n",
    "            # Accumulate reward\n",
    "            episode_reward += reward\n",
    "            if done or trunc:\n",
    "                break\n",
    "            # Update state\n",
    "            state = next_state\n",
    "        total_reward += episode_reward\n",
    "        print(f\"Test Episode {episode + 1}, Reward: {episode_reward}\")\n",
    "    avg_reward = total_reward / num_test_episodes\n",
    "    print(f\"\\nAverage Reward over {num_test_episodes} Test Episodes: {avg_reward}\")\n",
    "    return avg_reward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "ef9e3e26",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test Episode 1, Reward: -82.0\n",
      "Test Episode 2, Reward: -84.0\n",
      "Test Episode 3, Reward: -73.0\n",
      "Test Episode 4, Reward: -63.0\n",
      "Test Episode 5, Reward: -82.0\n",
      "Test Episode 6, Reward: -78.0\n",
      "Test Episode 7, Reward: -90.0\n",
      "Test Episode 8, Reward: -72.0\n",
      "Test Episode 9, Reward: -74.0\n",
      "Test Episode 10, Reward: -92.0\n",
      "Test Episode 11, Reward: -81.0\n",
      "Test Episode 12, Reward: -81.0\n",
      "Test Episode 13, Reward: -96.0\n",
      "Test Episode 14, Reward: -92.0\n",
      "Test Episode 15, Reward: -73.0\n",
      "Test Episode 16, Reward: -82.0\n",
      "Test Episode 17, Reward: -126.0\n",
      "Test Episode 18, Reward: -73.0\n",
      "Test Episode 19, Reward: -104.0\n",
      "Test Episode 20, Reward: -64.0\n",
      "Test Episode 21, Reward: -72.0\n",
      "Test Episode 22, Reward: -65.0\n",
      "Test Episode 23, Reward: -80.0\n",
      "Test Episode 24, Reward: -80.0\n",
      "Test Episode 25, Reward: -93.0\n",
      "Test Episode 26, Reward: -82.0\n",
      "Test Episode 27, Reward: -83.0\n",
      "Test Episode 28, Reward: -65.0\n",
      "Test Episode 29, Reward: -86.0\n",
      "Test Episode 30, Reward: -73.0\n",
      "Test Episode 31, Reward: -75.0\n",
      "Test Episode 32, Reward: -66.0\n",
      "Test Episode 33, Reward: -73.0\n",
      "Test Episode 34, Reward: -82.0\n",
      "Test Episode 35, Reward: -93.0\n",
      "Test Episode 36, Reward: -94.0\n",
      "Test Episode 37, Reward: -92.0\n",
      "Test Episode 38, Reward: -72.0\n",
      "Test Episode 39, Reward: -78.0\n",
      "Test Episode 40, Reward: -113.0\n",
      "Test Episode 41, Reward: -65.0\n",
      "Test Episode 42, Reward: -88.0\n",
      "Test Episode 43, Reward: -73.0\n",
      "Test Episode 44, Reward: -88.0\n",
      "Test Episode 45, Reward: -163.0\n",
      "Test Episode 46, Reward: -73.0\n",
      "Test Episode 47, Reward: -73.0\n",
      "Test Episode 48, Reward: -72.0\n",
      "Test Episode 49, Reward: -75.0\n",
      "Test Episode 50, Reward: -81.0\n",
      "Test Episode 51, Reward: -73.0\n",
      "Test Episode 52, Reward: -88.0\n",
      "Test Episode 53, Reward: -73.0\n",
      "Test Episode 54, Reward: -82.0\n",
      "Test Episode 55, Reward: -73.0\n",
      "Test Episode 56, Reward: -82.0\n",
      "Test Episode 57, Reward: -64.0\n",
      "Test Episode 58, Reward: -64.0\n",
      "Test Episode 59, Reward: -82.0\n",
      "Test Episode 60, Reward: -73.0\n",
      "Test Episode 61, Reward: -78.0\n",
      "Test Episode 62, Reward: -73.0\n",
      "Test Episode 63, Reward: -73.0\n",
      "Test Episode 64, Reward: -73.0\n",
      "Test Episode 65, Reward: -75.0\n",
      "Test Episode 66, Reward: -74.0\n",
      "Test Episode 67, Reward: -80.0\n",
      "Test Episode 68, Reward: -78.0\n",
      "Test Episode 69, Reward: -96.0\n",
      "Test Episode 70, Reward: -73.0\n",
      "Test Episode 71, Reward: -104.0\n",
      "Test Episode 72, Reward: -72.0\n",
      "Test Episode 73, Reward: -73.0\n",
      "Test Episode 74, Reward: -74.0\n",
      "Test Episode 75, Reward: -72.0\n",
      "Test Episode 76, Reward: -94.0\n",
      "Test Episode 77, Reward: -73.0\n",
      "Test Episode 78, Reward: -94.0\n",
      "Test Episode 79, Reward: -93.0\n",
      "Test Episode 80, Reward: -74.0\n",
      "Test Episode 81, Reward: -220.0\n",
      "Test Episode 82, Reward: -72.0\n",
      "Test Episode 83, Reward: -80.0\n",
      "Test Episode 84, Reward: -106.0\n",
      "Test Episode 85, Reward: -73.0\n",
      "Test Episode 86, Reward: -73.0\n",
      "Test Episode 87, Reward: -82.0\n",
      "Test Episode 88, Reward: -78.0\n",
      "Test Episode 89, Reward: -74.0\n",
      "Test Episode 90, Reward: -93.0\n",
      "Test Episode 91, Reward: -72.0\n",
      "Test Episode 92, Reward: -73.0\n",
      "Test Episode 93, Reward: -89.0\n",
      "Test Episode 94, Reward: -96.0\n",
      "Test Episode 95, Reward: -72.0\n",
      "Test Episode 96, Reward: -100.0\n",
      "Test Episode 97, Reward: -73.0\n",
      "Test Episode 98, Reward: -74.0\n",
      "Test Episode 99, Reward: -82.0\n",
      "Test Episode 100, Reward: -63.0\n",
      "\n",
      "Average Reward over 100 Test Episodes: -82.25\n"
     ]
    }
   ],
   "source": [
    "avg_test_reward = test(num_test_episodes=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "31340922",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x7f5345d533a0>]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAABVyElEQVR4nO29edwdRZU+/py+912yEgJhMSwJGJGIisgXZMQdWR1QnHHEDXFhGHHGcZwZgyiO8gPcQTYHGHGMOCwqmxCWEELYCQmB7HvI+iZ5s777+97bfX5/9FbdXd23b9++a9fz+QTu20vV6e6qU6eec+oUMTMUFBQUFLIFrd4CKCgoKCjUHkr5KygoKGQQSvkrKCgoZBBK+SsoKChkEEr5KygoKGQQ+XoLEAcHH3wwT5kypd5iKCgoKDQVFi5cuIuZJ8nONYXynzJlChYsWFBvMRQUFBSaCkS0Meycon0UFBQUMgil/BUUFBQyCKX8FRQUFDIIpfwVFBQUMgil/BUUFBQyCKX8FRQUFDIIpfwVFBQUMgil/BUaHnv6RzBrSVe9xVBQaCko5V8hirpRbxFaHv/4hwX4xh9fw87eoXqLoqDQMlDKvwI8tqQLb73yMby+eV+9RWlpbNk7CAAo6mrjIQWFtKCUfwV4cvkOAMAnb3mhzpIoKFSOnb1D6B0q1FsMhRpBKX8FBQUAwCnXzMHHfjmv3mIo1AhK+SsoKDjY2TtcbxEUagSl/BUUFBQyiKZI6aygoFA9MDMM5UvPHJTlr6CQccx8aSOO/d6seouRClbv6MXvXthQbzGaAsryV1DIOO5ftLXeIqSG8258DgWdccn7p9ZblIaHsvyriBVdPfjq/76KkaJaCKYQjaVb92OooNdbjKZHQa0FiY2KlD8R/T0RLSMig4hO9p27gojWEtEqIjpLOH62dWwtEc2opP5Gx3f/shhzVu7Eiq6eeouiUEes2dGL066bg1198kiaHT1D+MRNz+PKB5bWWDKFLKNSy38pgAsBPCseJKLpAD4L4B0AzgZwKxHliCgH4BYA5wCYDuAi61oFhZbFHc+tR9f+IcxZsUN63l5Y9frmvbUUS8HC7OU70J3BENeKlD8zr2DmVZJTFwC4h5mHmXkDgLUATrH+rWXm9cw8AuAe61qFGmNP/wj+8HLo3s5NgSkzHsW1s1aUvO7xpduxantvDSRSSIKFG/fihbW76lL3UEHH12cuwBd/+0pd6q8nqsX5TwawWfh7i3Us7HgARHQpES0gogXd3d1VEjO7+NY9i/CDB5di5fbmpqRuf3Y9jBJxipfdtRBn3fBs5DW1wJPLdmDe6sZvyws37qlpfZ/+zYv4/P+UVr5/eHlj6hSqwWbb2bh7INVymwEllT8RPUVESyX/qmqxM/PtzHwyM588adKkalaVSewdGAEAFIrN7yCbt6bxFSoAzFm5ExffOb/eYpTES+t211sEKX7w4FKc8+vn6i1Gy6BkqCczn5Gg3K0AjhT+PsI6hojjCgqJoKsIDwWFslEt2udhAJ8log4imgpgGoD5AF4FMI2IphJRO0yn8MNVkkFBQUEhEpxhu6HSUM9PEdEWAKcBeJSIngAAZl4G4D4AywE8DuByZtaZuQjgmwCeALACwH3WtQoKCgqpgbOs1WOiohW+zPwAgAdCzl0D4BrJ8VkAWmMtuYKCQlODqN4S1A9qha+CgoJCBqGUv4KCQsuhFVifjbv7q1q+Uv4KCgoKDYanlu/Ah37+DB5f2lW1OpTyV1BQaDk0u+G/3FrMtmxb9RZhKuVfBlZ09WDKjEexKYOrAeuBV9bvxpu7qjv1VVDIKpTyLwN/WrAFAPDk8u1VKV83GL99foNK7WvhH25/GR/+xTP1FkNBoSWhlH8D4S+vbcHVjyzHzU+vrbcoCgpNDRXnXxpK+TcQBoaLANwUvwoKrY77X9tS1/qzPEYo5a+goFA3PLemOqmc09Dpf31jG254anUKJTUmlPJXUFDILKIGiX++exFueGpNzWSpNZTyV1BQaDlkmc6JC6X8FRQUFDIIpfwVFDKGv/vNi5gy49F6i9EQyHJUUEVZPRXSwbzV3diyVy0cU6gNFmxs/Y3iuenX+FYfyvJvAFx853xc+cDSeovRUjAMxr/d+zpe29T6iq7ZUNQNLN26v95iAGj+NBCVQCl/hZbE/sEC7l+0FV/531erUv6zq7tR1I2qlF1LzHzpTbyxeV9N6/zl7NX4xE3PY+X26uWtyTCbExtK+SsolIlnV3fjS3fOx63PrKu3KBXjqodqv5Heki2m1d/dO1zzuv3I8iChlL+CQpnYaSmtN6ucb11BoZpQyj8hBkd0jLTAtF9BoVHwq9mrVWqTGkJF+yTE8Vc9Xm8RMgdxhp7lEL1WxY1z1mBX3zCu/dQ7a1dphpuRsvyrhMeWdGHxlsaIaGh1ZLj/thzSSmeubIPSUMq/SvinP77m/L7glhewZkdvHaVpDdTb2l+7sxcf/eUz2DcwUlc5sohqffssrwdQyr9GuHmuytGfJkRdQDWq85a567C+ux9zV+2suKz9gwVMu3IWnq9SVksFhVJQyl9BoQ5Ytm0/Cjrj5rmtmzUyTZRr+GfZoo8LpfwV8NDrWzFlxqMNn2JCVACqayukgSz7BpTyV8CDi7YCAFYrv4RCg6JcHZ1lpR4XSvkrNCXq7fxVaA1kuRUp5a/Q9MhyB1ZQSAql/BUUFBoKe/tHcPwPHsf8DXucY+XO9OJeneUZpFL+Ck2DMIdvrUI9FWqD+W/uwWBBxx3Pra+3KC0NpfwVFBQCWL6tB1NmPIrn1nTXvO6+oSIAYGyHm32mfIdvdi36uFDKvwmwo2cIBZVEzhO7rfp2dTF/w24AwFPLd9S87v6RoPKvFrLcjJTyb3AMFXSceu0czPjLknqLoqBQE/Ralv8Y0fIve5GXQiko5d/gGC6YFv/s5dvrLEljQa3gbF30DZvKf1xnDSz/DDcjpfwTYEWXWgxVD4R11Az335bE4IiZ2bOzLVdnSVobSvmXAdva/MtrW+osSevgvVfPxvceSJ/SavUB4aSrZ+PKKry3RoJmhXFt3z+Eh9/YVta9cS36sBlkFhzGSvkr1BW7+0fwf69sAgB8/8ElmDLj0dBrvZu5uL+zGOq5p38Ef7TeW7Pi5qfXBtJj3//a1sB1F93xMv79T2+kUufSrfvxwKLSxlsGdH9lyp+Ifk5EK4loMRE9QEQThHNXENFaIlpFRGcJx8+2jq0lohmV1F9rUCbVTG3QtX8Qd72cvjLLQiduVryyYQ/mriodSrp132D5hYd890/c9Dy+fW/pgSQLzaZSy382gBOY+V0AVgO4AgCIaDqAzwJ4B4CzAdxKRDkiygG4BcA5AKYDuMi6ViHjOO26p1MtLwvTdoUUkOFmUpHyZ+Ynmblo/fkygCOs3xcAuIeZh5l5A4C1AE6x/q1l5vXMPALgHutahRLIcBt1kEShk5qsNR3837kenzALxkOanP9XADxm/Z4MYLNwbot1LOx4AER0KREtIKIF3d21X2WoUD+87crHsLNnqKIyWr/rKkQhbihw2FVZaD8llT8RPUVESyX/LhCuuRJAEcAf0xKMmW9n5pOZ+eRJkyalVWzTIksG7Ihu4Pm10dsbZsAwUwDwyOJtGC6WXt0+7cpZ+MUTq1KrN077+vrMBTjvxudSq7PWKLmKgpnPiDpPRF8G8AkAH2N3rrQVwJHCZUdYxxBxPPNQ+/xGoxx930yDQzPJWmvc/my85G7mlphu/6n0ncaZOcyuQ+qLNFFptM/ZAP4TwPnMLO4B+DCAzxJRBxFNBTANwHwArwKYRkRTiagdplP44UpkaCXs6hspfRGAom7g6keWY1ffcJUlalyoFb4KaSDLA2+lnP/NAMYBmE1ErxPRfwMAMy8DcB+A5QAeB3A5M+uWc/ibAJ4AsALAfda1CmVg7qpu/Pb5DbjqoaX1FqWmKKejygaHRt2msln0z3WPrcDza6LpuGbCyu09MEIaVRYGhYqSZzDzWyPOXQPgGsnxWQBmVVJvq6NUu9MN84qinoEWGoIknfPM65/F09/5EI6ZNDZ9gTKA2+atx23z1uPNn5xXb1FKolTzWLp1Pz5x0/P43KlH1USeRoRa4avQmgjp/XGptVbFwo176y1CQ2CbtXBs8ZZ90vNZsPyV8m9AZCmypzy0Zo+spaL59G9erF1ldUTcOP3wZIGt2dZEKOVfZyzcuKf0RQoBlOqard91FVoVhsEYKuhVrydzyn9HzxAMo3FUw9Mrd0aez4IFEhetOhVX39gLSmFZdqVvtJ5t7brHVuDWZ9ZVvZ5MKf9Nuwdw6rVzcOszKp6+2VFqWt8KA8XK7T2ZSDNQTzTiHhF/WliblPGZUv7b9ptOnmebKFxN9f1s4vk1u3D2Dc81fdrmZkUWBt1MKX+F5gaH/JZf2zydV6ZnNuzuBwCs6OqpsTT1RxqKN/5mLuUdbyUo5d/gyEIjrBRR7ygDBpyCQiJkRvkv2rQXjy9t3k3QlQ4rc4VvE72wZpK1WVDpzC8L36SiFb7NhE/dWnl8cz2ohGpzj81Ej4iIu41jFrhbheQIbR8ZaDaZsfybCTJuu14Lv372+Eq8tG53nWpPDvb9v5HRrANwKyML30Qp/zKQxT18b31mHS664+V6iwHA1yHr2DezPpnY2z+Cv73peWzeM1D64nqh0pTOGfjGSvk3IMQhptqNsOUbeas/Xx3wyJIuLNm6H/89r/oLkRSqB6X8Q7Bw4168vnlfvcVQEMAew7/UIq/m0fpNJGrTIO4rzTDlnx2Hb7mwE2DVO31ttbnHVmjksme45Hev1lyOVkJku8jAaNVMxkNSZNPyb/3vmnms2dkHQH1qhWTIQrvJpvJvJkS0wjQaaDNZOI8t6XJ+xw31VEiGyHeaQuK1aiP+Ct/s7uSVTeXf+G1XQYIbn26MhHxNoPtaAllQwPVENpV/QtRlkVfEuWbRQbrBeHxpV6qzjPgOvcbXIE0gYkND9o3j9tU0NnNZ190X+9pGglL+GUct9M4dz63HZXe9hkcWd5W+uEFx89Nr8MCirfUWI1Oo65hYRuUf++W86slRRSjlXwbqscirFazCLmu/1N19w3WWJDl+8eTqqpWdhdWk1YSsj1Tab7LwRZTyb0DIGl61GmOzDi7iVD8yq2f1RVGoEmpB2YWmdM5Aw1Fx/g0OZRV68fAb27B/sIBzTzis3qJkCo3sO2lcyRobSvk3IGpLLjVX1/mXuxcBAM4RlH90Vs8qC5QCmkHGRobc4Zv8XvP+1v8oivZpcCjFkC4a2YKVoVGkrcdrq2adJXeCa5QXX0Uoy7/BUe+Uzo2KVuqcjf4oCzfuRU5r3BYo9ZHFbCBZ3sZRKf+Mo5WUqAz+6Ts12QqtRpDWznNVa1STein1XptthpgEivYpA624k1ct0TpPki5a6RungXLfhnp9yaCUf8bRqP2mVIcWB2K1gXtropJvF/vesBW+GWg3Svk3OOrZCNO2SBuBwlBofJTb7NKakTMz5m/Yk0pZzQCl/BsQGTA6SqLkO4i7gXsKslQbzSBjPVDphizl1nHXK5vwmdtewuNLtyvLX8GLVtzDN6qRt8SsIwOduBqo52Yu9Yqx39DdDwDYsndAxfkrZBuN3PwbWbZykQUrsxzY76PW7yULCl9ENpV/E33j6m/g3kQvQ6Ex0AThsrE3cwlb4culr2l2ZFP5NzgapWvVs9GnVXPWrLlWADv/rywnfzl1AV5aNwutJpvKv1G0awxEdYBUtnFMeK7e8Fhm9RMjJTTeEzRRF5Gi1MBROr2DEErceJ8nFWRT+TchWrT9VR3+jpvGbKZVlUGjoFzOP8nsrvQ6ktZHRcqfiK4mosVE9DoRPUlEb7GOExHdSERrrfMnCfdcTERrrH8XV/oArY6oRlpt66yuSq6MRV5Nb6VGRVzVToyGw6JNe7F4y/6S1yVrpxz73lb9BpVa/j9n5ncx84kAHgFwlXX8HADTrH+XAvgNABDRRAA/BHAqgFMA/JCIDqxQhvKR8Gu2In8crXha73mbLbdPNmG2u6dW7ExeQoVNNwuzu4qUPzP3CH+OgatWLwAwk028DGACER0O4CwAs5l5DzPvBTAbwNmVyNDq2D9YqLcIdUFJzraFOqf9KH3DRQwVdM85NVSVRnWagsj5t1BjE1Ax509E1xDRZgCfh2v5TwawWbhsi3Us7HhtkbBH1WOR1wW3vGDVXR1EOpSbpM03q9Pajx09wzj318/VW4y6I412Fzdff1j7b5a2XwlKKn8ieoqIlkr+XQAAzHwlMx8J4I8AvpmWYER0KREtIKIF3d3daRXbFMhAuyuJanW+Rrfi1u/qL3lNoz9DrZHkfZQcHJKJ0lQomc+fmc+IWdYfAcyCyelvBXCkcO4I69hWAB/2HX8mpN7bAdwOACeffHIWvkXZSOWlNOmbFcWO6vzNoCiVw9eLVCz/uJu5KIdvMhDRNOHPCwCstH4/DOBLVtTP+wDsZ+YuAE8AOJOIDrQcvWdax2qLVv2aKSNtvekvLlJpp1u1g4Zx+FayMKnF22+5gQbVeB2t/o6Bynfy+gkRHQfAALARwGXW8VkAzgWwFsAAgEsAgJn3ENHVAF61rvsxM2cnh6oPz6/ZhVc37E18f4Oosbqg0m36KkHa40fUs2TxG5ed0jnBRy5rv4gWHQgqUv7M/OmQ4wzg8pBzdwK4s5J6K0YdetT+wQJGigba8+5k6wu/fUUQyQCDUGu3crSzNN1W73+uLCfurETmuj9vE2jDuBKGPUoTPGLFUCt8a4RnVnXj8v97TXquAyPY0PkFfDv/lxpLFY36pnSuX90K9UXZnz6J5V9GKHErrncBlPIvC5U2gtnLd0iPj8EQAOCLuSfLlKdy1FXBV3Ivy39HXddIqCQ3Ud2d2I3iN4lASVqnQdtFLaGUfwMgrB3Wu5NXu/Zoh2+Vlu60QK+vxRNE1pHgHW7Y1Y/VO3rj1V0u558kt08ZZbZAk5GiUoevQhVRqtFVP7dPi7b6BkAWlIuIj/ziGQDAmz85r76C+ODJ3pmxb6KUfwNAC1tlWIO668lnRjqbyxIr/sUNE+oZE7Inq7tiqvI7LDvUU3p55S+pAyN4K20FjCKAXMXlNRqySfskbBfVisPRYEiPGx6rpPaot46JQt0VYIWop/y3tN2AlzoqWIzf7C8f8lmtZzMXBmbk78ajHVcit+B/ailazaAs/wZAzqf87WZp1KCT1dNZmtbK1nLkbEQqK8rSlZkblc7WzsvNr+j+aqN8zj95GVGXHUBmqg0abM2lSNlU/ika8O0o4HDabba2hNNhv/K3UfMNrBtPL6aORnnERpAj8UBYbdqnTLkqGdDDQjoZDLJz/uutmVk3m7RPirih7RbM6/g3YPlDicsgMhtZuYugUgn1rHYFkcUnn3Z4O2qTowYrWs37yvCNJKsiFZSSckfPEN537Rys7+5LXkeMUFDHKFPKv/Vwye/m4/I/yhdexcXRZMXuD+xOXEaY5S/SPrXojJmw/BvkGeuZnsKVoYqFV4BScj26uAvbe4Yw86WN5vWyMhLU69/A3QnE0EcSlNb4yKbyt77p3FXdeHRJVxm3RVmqcgUeB37lbzfBUg246qGeKasef2mVKJ9GVVxJUKtHKfXORmEI383fjbd2z66NQCEo2e6thl9N/w0zg5TlrxALFTTEONE+1YI3ztl/rurVh0KsutQgV5bDt0FIorhSVHOA98tworYO/5T/K85ecUUVay2NUkrdbxzJLo+buC2qPTiWv6GUf+ugGj0qRcs/hSIbEklfe9i0fhL24rr8HThsaxlZwRtD91eExJx/ifNh7bCW+HzuKRw2vCHyGnutRjVnjgz3fZCifRQiUQXlX8ryT9vh67e40taT5ZR31UPLSl5zmrYcF+XnYvrinyQXql4Qo0waZEAKm4EGUEWBr2m7E9/f+JXIaxzaJ8J6j5u4LSqrpxPtYxQjy2pWKOVfBiIXeVWg/MM6Xb11QrVj4ist3h40NSO+ZVbvd1ou5LOeZE8RGNx9f8dW/qmD0YlhxP06Du3j8D7VkcmmfZTl30qoRmOpSPnboZ62JWOiFpy/xwKtclWBUNYKamTm0LQYjQaZ0eCPKS8HadM+YzGAT2gvxX+fKcf5X5f/H6zsvAQf1xaWVX8lX7/UuKFCPRXiow60T7VRT9pHRJiq0aj8d16PV1rJIFcLh+9P2u7Aze034XjaFPPGdF/iu7T1AISw6RLwW/7S2VEKImpK+bcgauTwPV97EW92fg5z2r+DKRQeUho63S7RgNN+DLHDDI7oGCroKdcQXl/kdSHHqEksfxkqffY06zyCdgEAxtJgwpLTQSfi0SvuxKOKoZ4Q2peifRQiIVH+X83PAgAcq3XhXyN26Qq3/IXiK5MuFGFW6fFXPY7Tfzo31brSHqyS0D6NEuopol4TPDbMdsfWl4k9mKZM+9j1d1A8C9um0aKctqVX8HLkdR7apwYO3yVb9uP62aurXo8IpfzTgqQVxe3TOfJ3QhO1p31qW19FnC0nVP4NovvjyiFN7JbwIQIDn+Gd2YnvM7KGlF+iPeh0wFT+RY5WS+4ir+R1lnpSgxmanXalBpb/3978PH49Z03V6xGRTeVfB4cvR9i9SaN9Ugn1bBBlmAT2e2viRwBQPfnXd/d58t/4vzWz16Ktd5x/h0X76CXUkrvIq/RCrXLg38zFmQm12oIbC9nM6pkQskbm0o+yBhJvehxK+xg1tsRrrEUrDSVNwvk3ykCRRqRKKXz0l/MAROyeZVn+dnmxQz2rRftYlr9eYuMUv+UvpX1KvaWSp9ntlzXsGLVMOZ5N5Z9i241rHXwq9wLeMI4FEOyIdqfzK7NabOPYvJZ/84R6VoJqPiEHaJ/6LvKyOf/Sln8aoZ4yQ47EC9z3wdUNfBBRy/6oaJ8KUc7U8L/aZkqPh2/jGBqFHEe0slHtducvv5z6ztAW4t/y92EUhpxjSWiKRtnMxZNTqez89SkJ4Vj+ptKrP+1jKn+jlFnjt/wll8RJ2Rx1HUPolzWkfWrZOrNp+acI1zqoRpy/+7u5dp5NH//ddj3yZOBV4+14zniXxckmUP4pyFLL8SPN7x6QO8LhG4kqbebi0j7lcf5JIGNU/Zx/Gn27XNTSOGl5y3/bvkHs6BkqfWEMiNPC99AafEBbnIp1YDeyNngdcGHRPlEWT7kQy6h2uyt3sxoReSsiShwo60X7xJF77c4+9AyFhy5yyO94ApR7g32b3+ObkPZJAePRj+c7/gXvprWOVKMxDCAO5+8k9zH/l6Dhus7i8PNuVs8a0j7W/99KW/Bm5+dwcH/1IoBa3vL/m588XZVyH+j4IQBgnXG4eUCi/KMifETYCm0MDaMDI+7S84gpaTMiIHeCBxH9IomUfwovL47Fecav5mH64eMx61sfqLzCBPXHKkf3Ghu1HExP1lbhCNqFb+Xvd46dllsOABjgjsh74+53EQWWDBz+DdzrQfvYBt85mrnP8nG7nwLwiarU1fKWvxQpzlrTmBqekXN3ExstcNq1mAJ68/k3/rAihnc2suUPAMu7egCE5PaJafqn+YR+ucnXZu24dqC2NKO/rrU8Gbe1/Qoz267DJOzF6doSzGq/AsfSVvN632YuSbpJnDBqqku0j11/CosZSqDlLX8pUnyftgJiDrqp4lYj0j2iQrN5yaNoBwYwyS238XW0FGkkdhPfTzLOv/KXV24JZp2M7+T/hAf00yuvP6Xvb6cqdhcXJjNgbCVMCX0Bfp/XZNqFt2ubAQDT9U34+9w8TNc24njahHU8GT7WJ0SmkkIHD3n6nhDqWQdHeFzWoBJk0/JPEfbq3LRSOntXWTI6MIJnO76Na3Bz4L5q5vapBtIo3h1sxWm5/NpptAXHxU1WVg4SPMgk7Mc/5x/E0x3/Dsz7eexCUnX4Bg54ueyk0T7/8efFmHrFrLLucTPYUiDE2WsMGYF7/OkdkqDkrWL7qiXn75+dVbGubCr/hG9UZjVG0T5xR29R4WtCoJthAHmYDe+DWBQpR1LUcxIh67zH00Ys7PhHHIT90nvicv5tKGJ2x3/iiY4ZPiotubxOGQneWg6CAtm1SlrejPzduFB71ndOVn8yBPL5+zj/vKj8o16U79yfF25JKJFN33n7Tl54VzmZ8vdZ/kkWecmCJrwbuLNbX01DPf2O6Or10Gwq/xTRZjfUivbwlSs0g90GmEf1k0tVeyCIMxR+Pf8oDqJefFBbLD3vcv7szrokkovW4ygrigRISfknKCMvpp82dGkZl+X/il+1/3dywcoFe9NjiEq3lmaBfxDPky6cc9+bf2ZSiU+s1L2exG41DfW0f5WZbC8BlPKvEE6HqcjyN6S/xb/bhI6ZaqhnDU1/f1Wyqks1dhnnL7snF6E0kuKttAVPt/8bxhg9Zd/rsfx9WSKjvkG6id18ZfvoDNHAiLR2U47z93+/9hAfmD3Yk28zlyQzMdmsITzOv/ahnrVw+GZT+af4Pu1OzSns5AV4NyjxOp1cVKs5NMrq1yhIZ0mSd08hyr+SJ/xG/mEco23He4fml32v5zsaekPEVTF7V/jmJRRLyI0V1y0OH36DRxwoNRiOJG4aFFuO5CLG2cDdiX6qYb+oZSbfbCr/hJCF7LVFWv7xEObwNTia104HtWtswUVesoiLaKvSVuqiw1dmpcosxkoRl4eVPZdX+VdG4SXn/H0HDH9WT1fpVpvnFh2+AdonhPO3fwc2cE/wQkopWXOb0PrRPrZ0ivZJGynOWp2GaqST3kH8bYgNUICoXD76i2fw7h89mbjuZoDYT2WWvyxEMRdCpVU2u4mXUExWRd5H+3johigLNr5wpeErzJ/YrY3Ev6tD+0yZ8WiApvEr/zZEc/6BzVwk9cR9b6GLvES5akj7uAFs1Q/1zGacf4pwOkwFnL84uou/PU4n6X3A+l398QRtAJTL+XdgxMzzwkGFL+b2KWX5i4q3EmXqfs8SVqPkmIfzr1CZpMUMUCCxmyhj9WifQDgjeQ/kPco/2DdS2cylJC3EkbSijRvabsZ7aQ1gnANoldvS/mgfZfk3A9Li/AW7yNMAq4S4FmitwSCs6vwy7m2/GiQqA5JQZBLBvSGhKdE+VpEUgzLwIx/g/OORSOnG+XtrYt8gJFrcSRd8xYEYxQZQwMARZyAiZZcTKD9ApH2SOHx9SpaiHL7h7+KTuRdxpNYNGOls8h5Y4VtFpKL8ieg7RMREdLD1NxHRjUS0logWE9FJwrUXE9Ea69/FadRfNqqh5OIq/8G9gUOyBg5YnH9KfHUjIElit/dqazyWvcsVc2zax+PwTeXbRxciyxgZFe1Tfu3JH+LD2iJBDv8ir5icf4XRPt4Im+jBWUb7xOH644RyOgJIyhLliuX/SGkhWEDqRo72IaIjAZwJQFxKeQ6Aada/SwH8xrp2IoAfAjgVwCkAfkhEB1YqQ60Q2eniKv837gkcCrNSOSzaJ4LrLBfs+V3lWUbg7/D6xHcSlskzyjIT32M+Lcs/Zvid/7lIsG4LnAtw/uY1ZciY8DMxAx/VXncPBEI9Y8b5h2abjSeY/7Ko2W0HCniftsK6zm/5l5Q0XAbJsYoSu6XkF3DyFVmyvL55Lz53x8uplO1HGpb/9QD+E973eQGAmWziZQATiOhwAGcBmM3Me5h5L4DZAM5OQYbyUIUZVexQz5EgRy9bxQiER/s0QwK2NBEWDRW2Axrgp4fEb5P83cXm/ANWpLsgbQR5SZy/fJAvXVN5YARDTkW0VRjtE3fX0eDgGH7jP+SewSQyV3u7lr9VTtT4VEqGUpw/yoz2ScnyN3wDGwEYKlTH4VyR8ieiCwBsZeY3fKcmA9gs/L3FOhZ2XFb2pUS0gIgWdHd3VyJmEDWifaS8nWTK75/aOiomJNrHRtrbONY6n3+kcSlcLSoi0fqLHeqZEu3j3BqXUhBgW9XDaDMtf9/5chaiVfKZNInylzl8Ix2NIbRPXMvf4Og4fxGTaF/gOvZxNmmFegY5f+vvOIo9pXDQl9fvxr6BAsQVvkkT5pVCyWgfInoKwGGSU1cC+B5Myid1MPPtAG4HgJNPPrnxTd24LVAPOobC0jsELLUyq2o0JBU7bGbkcv5svhShk3iVf1p8bMxQT8kVOUf5twfCghl+ysVFujt5+WYYrENUAaKjNdLyD6N9ypDDTdMRjPMPg/86V4xKZnPhx6PaV/CGdJT/P9+9yKrfVf5alXy/JZU/M58hO05E7wQwFcAb1sh0BIDXiOgUAFsBHClcfoR1bCuAD/uOP5NA7rpCys/G5fwkUQEaDBRZQ54MjxVk1CDaR4Tdmdbu7K1NfTGvy4Us9/d8BzYAygnXpb/C1603GnLL36J9OG9x/uw7L28/MnkrGfxz5Hc8uyog53+fZSLu6lQ/nRnVxsV3HYz2CUecUM5gXeQ5730fJZR/FTN/VsvyT0z7MPMSZj6Emacw8xSYFM5JzLwdwMMAvmRF/bwPwH5m7gLwBIAziehAy9F7pnWsqSBtrHF7pKSRaGAUra3rvMo/2vJPQ5EZzHhuzS7Pse/8SZ5UrVKU04RvaL/V+R2W+8jzHfwbk4iWf9orfEsoRv93ER2+Nu3juZ7DlX+aCMwk7fQObKd3iBnnH1Z+bM4/fDYXBX/kW0Wbudh9KNR5LTEuIgtM9/u5nH/1gj6rtchrFoBzAawFMADgEgBg5j1EdDWAV63rfszMe6okQzgSvk1yuFGZ5R+T8w+hfQrIoxMFL+1TgvNPA//3yiYUDTvCoLrwlx+30+aEjiXN5w9IlH/YCt94dcoRr+H4LWAGC7RPUPkD5XL+yR7Cv2iQffRTvkZx/kHaJ8qvFRHqGV1L2We9ay/MWbfOhBxxgCILIGXL30v71InzjwvL+rd/M4DLQ667E8CdadWbCBVqOamlEneaHEb7WJZ/jtxEVp6FJmJVcQWNgU17BlIsrToYA1dGD+dP4cpfVHLph3qWuC6K9kEbYAxKzvuUR5UcOxoMDHMeHVQEReT2iWzPIcooLu3jH4SiBr42SZ4f12q3/h+r1qAMUffaxkUBOZN2rLnlbyv/VBYOS6FW+CaArLHm1jwe6LD2dNrGELeFWP6u8veGeobF+aerGN5Lq7Cq40uYvPWxVMv1I+k2jqfxIqEMkSsOn5aHrp1IxTlYShEEDzmWP7cB7M/nz54c9iLkKZ2jqw8Xi5GHYQ5AQGCF73hyB6XIVcwRVEkcGMwOhcOIpn1ku3qlwvlL7vZw/gZDI4Zu9cvSyj/dmZJ3B7MG4/yzDL81PsRmZyo19RtCe0iop8j5+8PNqhvqCQDHaF3ooCIO634+pRLliKMbZIpApM8c2gccqfyrscI3LvsasICZ8R5tLYBw2ud92nJfXd7/pwUNBgq2QnNCPSVIYMmWY/nHdfiKyj8Y5x/k/E/TluHB9u/jbU98HhjcFyFrCRmtuoq+dxVeYNrK3400qhLro5R/Evgb65/0D5k/fAqoD52evwfREW75s0X7xIj2SUMhHF18E/e2/xg3jfwAR5Dp8J26+UFgU3VWE8oQVxHLFAWzmVfFQe/20Hu+n78LD7RfhW/kHkwsqxfl8cnH9i/Cl/KzAQA9GGPF+QsO6ZFe/KztDm8ZUYZ3WbJ6b8zBQMFie+1wzj0YBwCYWfw4ri58AQBwYN/asuuPK9chxS78VHheb5CDV9O1CzOiL+WexLX5/8Fh2+f66nVrfr+2FCdq6zG+60Vg97pwIZzpg4HH2mdgkfYFfHirsIuapewLsS3/dGkfEpR/tTj/bCr/Cvfw9Vvj/RhlXeA9Pow2bDYmOX8PcnsI589OI/t9+09x5lZzs3aD04tU8WN6cTlO1VbiJGMp3k1CJ9m5PPymOsGb28e1/nRRUWx+xXOP+I2maDvwHm0tPpl7oaKB0+FhS0X7+DR3p9EHAPjWyDewm8cHrEhNd7eZXGxM9ZxLewP3PHSMWDNVtgwRAmO1MRlXFS/Bi8Y7AAAHDGwMLyfkJcYdzC8bdBU/w3KoWtAovJB20vG5/NM4bt2dofVFBQF4ZLX+n4eO47VN6KQCDhtcE7g3Nu2TssPXbr+H0Z6qZfbMpvJP+C47iz34af523Nh2s+d4P3dY5QbDDQfQ4fw9jHanw4nIwXAbGYDTd94N7NsM+KkNn/yVNAmxQXn2B9bKjwG45HfzcdVDS2PU6UVc+c+AOxtxLX+TKntI/xvpPbL31l7hPshi+F0U/JSCZmmplXyUuX6b2aO4vCuY3ecLlaMC7kqDYaaYEOrVwDAsVbCeDzfPJXCSx5Wrj0Y7v+132cUTY9djyy1L8OZV/uEK2cnQKnLrQkF2HfWw/I+m7fhG/mEAwBm5RfhQf3Wi4bOp/BNicv9y/EP+GXwwt8RzfNhyoMmUvyGovBHkAT2ogIhczt/Boj9UdScvTZDVs4lHAsfV3FXdmPlSuKXoFO3/2+psl+Uexhdys0P9G+/Aeue3y/nbvhKrCfutacl766BCRYrTtvy1knH+/joM67+aqWR9ikK8Ps73TuzwZSAHdpQ/Cv1Onfaz6fb7TDD4xJWrD2Oc3+/VTGt7gDvCLg/AUf6S+uLG5stm8bJ7bTpWXpYgQIqW/8y2n2ACuTnADtR3p1a2iGwq/4RzaS1kIY7bYYKx5iy84iJyoaGeBb/yN/RgtI/htXgqgdjoRadaLbesA8wON6PtHvx/bb/D9W23lrzeiRJh21diKTL/u5fQBx0YqUhWMQfOh7Q3cOL+p6Upuv2fR3OUP1nK3wi1/EVaS1JUxdDIMFNMABjz3DU4lrbirNwC59nsGUAUtVUp599LrvI/xMrd85hxSsy74Xxrf8gnUAbtY1v+EmvfvNXs68UIyz/uLKNcdJBXR1RrrU82lX9CaGEWj0T5H0HdODO30NMhisjFon3MsvRgtI8VJZJGpGeY8mdDx7Z9wTj0NCAJgsFYDDl/T6WukmWQoBZzxJEDrx+V0j42zik8hd+3/xRf2voj4OXfBM77aR9bwTDIon0MS54COo0Bj+z+WPY0YYZ66ujlUZinvwsA8O38nwEAm9n0Tbkz1fIVTtxon3003vk9bA3eb7IsfVgIONwI0mJa4zIKz7uLXulon0pXRIehwF7qVSn/hkDwA99WPM/tMEIn/kbuIQDAVj4YfWxG/RQ5zPLnoOXPRjDax3dvJc5AL+fvNuKX1naju3dYdkvFkHWPccICrnEYiHymImsu7WNp2DDLTEr7oJBKqOdRvNU9KEnRHUxZLFr+pvLPFXqxoOMy/O/2C3HsyttC5U43sZs5uBjQMMd4DwBgFEawl8fissK/emqMtPwrdPgabKodkepZYRyNB/T345bi+SXvdzb0cXxfIm0Wk/axbvGkmeDgvdGWv2iYpWf5i7qgwLnUEhP60fLK/5WOb+DJ9v/wHkyoAPxc73dGLsN1xc8Lyt8tuIMK6OHRuKzwbXx8+Of4wPD1ZkOSWP4EdrlFR0YjsBISxWGP+JXYGmLDFS3itTv2V1Bq+RhHpvLfzeMwjsJnHDqbitOhUKzOFsZRyxbHtZGeekieTOP5D9mWv8n5m8q/Y2Svs6hqTL+7D5K7Uxk8/y9RZWxoMKDbvgfAmQmI9KQBDZpRxMW5J/CB7TNxGLycczjtE1cwV7HaK3gH0IFvFy7HXP3E0rc7lr+LG9puxvnaC15nfKTyt0MpRWd7UJlHKX+v5Z+m8nctfwOa1JBJAy2v/A+lfXibtrX0hTHgj4CwFY8hoR4IjL08FgY0dOEgbOZDzRFd4vDNCSt8HTAjsJDp8RmpPAcQTvtUc8NomaPwnNx8AOYM6WDqwd9o8qghAxpY7AglOqf/W9nx42JYZbmQp/UIdnr/VaLlzxbnLy4KIg6uYi2F07Ul+Kfcw8CbL8S63pYrZyl/exaTh+62X/s6IhzcuwI/avs9zui6Df+QeyZe+TGbjuZY1Zrjm7ENKFGWojVD6OcO/Kn4Qee4Y6ELnP8ncy/ixvZbPIpywQZvwkLAbOvT6U2MGzbXhYTRPnbh9bH8XeWvQysr71M5aHnlL0WMufSlub/iH3N/9d5mNYDn9BOwm8dhHb8FgEsH+Llbw1dREfkIh68vxNLQYRjwhpUO9ZjVpKCfRUeXGO0T5tSuBpiBz+eeAgAsN44GAEyiHum1NmUi7uELlKZ9CtaMashycpKe3OmbgxFYhCTr9IaP9HdpCnLuFxW+nWPHQ2tFRduA8ZO2O/DdtnuAx/6z7GfQkXMMlzzpgXbK0JAz3PfURsGdx4Bgip/Y7dL6Vj3sOn4DhhRcJTiATjxovB8AsJUPEpziwQoPJdcBf8vcNVi2zTuT/Xb+z5jV8T18440LMRE9Xj+eaLzFsPwPFttqipa/aAjq0BTnnypiNNLvtd2NK9ru9h01P8J/FS/Ge4dvwxI+xjoaVP6axa2KMKN9JKGeEByXjoyG40DbxeOxHFOdBpbTB/GHtmvx/S2X4WPawtIPI4F3j1t5vvzagHBf8UP4q3Fa5FW2s9S1/P2crH8zcnuRjvleBy3lf9yaOyKX/UchJxukY0RH2UaDQ/vAp/zZprBysd9/BywjoozBzM5Rr9u+B8gtf5AGjYNpFb6cexw/z/83puycY5XnK7+E7Kdpy/DLtludtvdPjp8BTh4ssR/YA7YODS8Y78S0oZlYZExzKpYNNmflFji/NRjoH/a2iwnodc79W/5P+Pfc/7mPXWao5xV59940Lf8RD+0TnfW0EmRK+RMMXJJ7DJ/pnQl0ryr7fk3oxCJktI9M+Rckyv8t2IXxNIhNfIi3MovzZwD36h82o4Gs8scMbMEHcktx9MganB5Ck5R8lhDOv1QMeyWQKQuy4s438qHO8WOH/hC4146Rd5N7+Tn/EMvf6kirjSPRx51427rfAeueTiQ/yUJyZbSPn/OHS23YbUIT2oH9u4AcyA5RZcbp2hK8Y988Z8Ynlu98vzKUjn2fEaB9vCa8Aa/yt+X/9/x9+Pv8s3jnxt9Lyy+VL+fu9mvw6dzzyFllbxAifOz3IvYZWwna37iAvDn7s/chEJ5LBmkCRmGA+kJ+Di7UnnX+1sRBJYbDt60G0T7VpH2qlc+/4XASrcaX80/g/NxLQD+AVzoBSDcpA7M8hZdtGfitdLnlz4FOVeSgw3ea5Y+wKSRXCAN2FsaivbOv3dEle9qWi7BoH3+I36HYg7fQbtNaHjUhUV025NsbWhy0QKeIYa8GEzRiiy93LX8yoi0z+/nsb/UKvx3fG/kq5nZ8Rzr7iiP9UbQz8E2ltI9PEXg5f8vyF+4jiwoUp/gT+tbhrvbrgI0A5g8CH/QGLTgKoUy6wX7fusWnt0k5f80zONl12e2E2EAeReQNr/8k7gI628ck1uty/u77tQduOzrIPu/SaJJAfwEyH010uhSXUrRDPf1J8ESIO/ClSftsscJu/6/4EXw8t7BqVGxmLP+L80/iPO1lLDOORh+NjZwue9rSgt85P0ULznN9qPKXWf4Fqw6zrE5r4dHrxlt9QuhgQzfTyrLF0bLb+WwktQr8Dt8RS4n6Lf+HO76PBzp+CPzla4nqiYJoiQZoLwt25zOst+9uAhLN+R9Hmz3nDRbqSDBF/1puFk7PLUMvRntPyKJ9fH+L0T62DKJlbf8uCrRPmy7ss+ALJzXfg634yvv+edKhI+e0zZxM+YOktI/97scOdWFxx9fxoyUfA15zZ2lxjd88m21e7Ee2DGI7MCRUkL1I7kLtWVzSexuway2Y5YO5jEKLu3GMTStG5fbxBGqkTPvs5nH4XvHrMJTDt3LkoGMDH47zRq7DoDY6fgrWWf/u/HSXlfumyRzG+Uscvrp3oZa96tTmNx2wAdZtpWCnBQh2+CSW/ydueg67e93FVe2km7IJZeegYzz6MAGW4hnaV3Y9Tvn6AL6Tvw+nrLkB2O9GXtlKLEr5252LQchDxxdzsy0xox1yx2mm8t/FBwAQYuyBRFaa7Ui8fORb3hNS2serdLwrfG0ZSnD+Yrk+xeLZ4a3MVMIaDOjs+h5ktA9Ic2QCXKPHVkKjC3swmiyrf896Qa54Mtg0o1ephyh63zEGgdjAD9tm4m8HHwIW3+uRVYRMaUYqf3HlsBCRZB6UlSWkylh8T2i55cJeiwHYs0EV6lkR7OkuYFsP0av/1hiTzT/IfUXu9F3O+Q8Xipgy41HcPX8TNIkTt4icM8W3P+cospQ/B5W/TU8UbUvNsC1/cXej8hvG0q09gU5QhAaDyekAv2v7GRZ3XuouNa/Asjmq7w38c/5BnLh5JrDCG0Flf5eA09GC7lj+Grr4IOSI8Vj7DBz/7OXWeXmcvwHCOuNw7OVxzt8OfZDgWXIw0MOjsdhy8rsVyWgf799iu7GVnCZEfdltogjNudaj0EKsTvNceZy//323oRhop37LfywGcRxt8mTfdCDQQ3Hj/PNs9wHR8g+Getpyea5jk/YRHd7+7ShtyJRmlBXtWWPBtnFhG0US2ocMbLUoGqx+MrTccqH5dJWW9toUp56MIC+MpqIileHY782SOtRsh1AY598zaCryG55aLQ31FB2+ftpnyE4O5xRqOCGAzjSdJco/YcpnDezEUZvPYFrfdtmnaiu9N1TQAHNiWgVB6dnRJ9GWv/3NCK8YxwMAjtc2YcLOV6zzcsvfv3aCxToSOLVz0C2l6Y9vlJXl4/ydRUnkLKYiT7uyLH92LX/PalP/hu9I5vAFXJrNfg5z29BgqKeo/P8+/yye6AhZYyLIWcrh61xXGLZmy6Kfx/3Orhzy2QBxEZ2OURK+xaIsM2mU5Z/jAj6qvYapO+c4tFuUwzcHA7t5HO4onpsoG26oHILy17l6oZ6Zcfh2YCS25Q8ITlDWTZOJSIgvlnP+4rZ4oaGeutfyt5X/IHxZDdnwLGTSLa4T8Cr/pA3D3jrS3lvWUWyOBe3ryRVY/h7LxafICN7QQz9sy8sASQcI5x375MvDjGe3n0IXIm2SDGTmYKIhsEik7GifiDh/kd/1KH+Zs7F8zt8NIBAtf0mcP3kt/0gIssV1+LZTQTLbCFI8sth/A4S87lKW0AuBb1DgHNpID7H8w2WcNLQRd7b/AlgGLJ/89wDEgAL5LMIxXBIFEcihkeHMUlWcfwoYTwOuFUnhlr+9QMdjUTtKV+7wdTMhug0kTPmTUfA0pFEWdzrst/zZ8ESBsBDtk4bD1x+z7oZSWnWkqfzFskRlAbcDhdE+tsPXY7kLcCgMSWI3He7up57ZRYIt93KS7+l/HhvhK3xF2kfO+U+gfmD3Oh/t4+f8K6N9RDnyUtrHG+0TXaj3e8ZBOwqSPmTRPsJs9Cja6Tln1kHI60IaEKMQ+Aa2tS6bFYf1F//ivbZin6essGgf3W5XKVIzfopaKf8KMQF9HtrHv3m1jaKl/GVLt8lSjGEN1x/tE+D82W1IzMBbaQveTeswzG0Qc6vYMf22U7OAvLeBCQ0xqfL3LyxzG7FFO/hvqKBxk6hIxE7EZmZOGe3jRHqwSwfILX8y/TIS2sfvKHRpn2SWv5Saklje/lBPjUXL3zYUxDh/c5DfjfHmgXu/4KN9fPIyuymrkzh8hcFWnt6hDJ45geXfgUJg9ixz+Nr+sHnGu51jOjRnnYB5oOCh0ABXYctCPcM2qfF/W3HtBYBQ2sfpNyla/h7aRyn/yjGB+oTRNBdqyeqW8s97lL/5Ye0OGcb560JHlHGpjvPIKIDBuLbtt/hI7g1s5YO85VHOVFCGGxUhWrehyahiwO6gbry32+ncBTQcHFQqsvzFWVRQWZjRJ37r04QTqgkK5j+CzaOTRPnrHtpHVLyJHL6kQ/cl3xMX3nlkCugckfO3N4QJWv43Fz+JZcbR5qKuCMvfe64c2kd0+JpytEH3WNvmdVoZtI/g8I3Q/afQCud3u2S24RpmQfrvfv104Tr/OotiwJBzwmnBgQEpzFjyr9zWrHDUqFBPO1LN6ZspLfQSZ5mmw1cp/7IhfvgJ1O9z+Bo4CPtxmrYM2LPBuU5nieVvO1qdaIBgdIR5mXsPgR2r1YZjRehmauHRGMYL+jtw7sh1nutM5W+48d9sL/KylL/oLCwz2kd3ZjZCw4U53TadaQbaUQxuhlKB5S/bk8D87c6kZLHmgDfUU0a7GKyBKaiE/Za6Z3aRkuVvSOoFJJy/JM7fng0ZIMHKzGOJMdXz7c2LfHVEDQwln0MW518B5x+T9rG3JQSAdioGvqVsha8Nz6I//3m9EEg/bfczMxeT9/Iw5R+0/AuessISuxnQPDP6NKAs/xTg74QOhUAm7XN92624u/0a4M6z3Gt0b0yzeYOt/N14bRFOTK6PjgnG+duNpGhdo6MPozDkc/bqlLccvi7n73FSV8D5i4ObqHTdxGkG3k6bgjcm4MlteBWZED/O7krP8Ggf0yLz0DaiWCAwSSx/8qYx8NBGCUM9/fUPGTnMW7U9KFPECl/X4Wvz/Hn3PJOp6AyfQvMreFH+stI7cMDybw9L7CZ8m0gI7SJqM5d2uFFe79NWxKJ9bBSFY4G190bBCfV0Z7HWgkUYQQouVPl7Z3V2Yruw3FGA3SYqCySQQQz11AU/XNpobeXv+9sTPWDoOMDeJ1NI9FU07EVOhtOYXIdvmPK3LH+fRS6N9gEcy78NejBXDCyL0oiI9qmA9rH7qu2TsOP4HR8FG3iXtl5yY3JOMzRsUaDR/O/0ReMdAIBtwsbeocpf8FXYsJfeO1WlEO0jX7dRuix7duZN72AN7CRatWQqOqNYgvM3z41wrkyHr+tj8UbQ+Dl/IQSTSgQEemi88Mvy5JVzPA14/pZF9jjn2PuOvCeLzkBqUzci568b/nbhDXG24acUc47hFcPyd4yK5H1kInrwudwcnK+9gLwVVgwo2icxDGYMsRtFIy4YgqG7iZmE2HPd4cR1jNgRODbn71hwYZy/P9TTH+dvc/5FMNiZggfktrhkMa2sGO3jDgp5x/KP62yzBzd/NNL1xU9bXK8h3+s2RMnEqVcL4agdiw3BEMovF76L6UN34jVjGgA5jQaYVqrM8s/7ni8q2scwGFc+sASrtveGPoNsq80iclKnYhjtw8JCM/udFH0RV2bab91RaGb4rVz5F5GP5Py/n/8DlnZ8BbjzbOs+ez2Bd6Yls/xtlFT+QpuPsvzz0J0UInKYMtgr3RcJ6U5Exezve3pxBNc+aiY3tFfb2jRMTmL550iSmRVBwyJn5S0aDtkj2i5fR2VpQ2x8IfcUrm37LW5svwUnaBtcNkHRPslgZsWULSQxKRQnlbGwuYZtKXhCIZ0QS5e79dRjl2t4LfLwaB/T8reTtvlhOLSPu8JXltunSHmnYcReYGNb/uTNTb+Yj7HkNdBpTdEf0U+15I4IjY1Rr2fa6qF9zN+yNHoGNAyg0xcFFRLtI1JiTp32piXudWHpHTbvHcAfX9mEr818NfQZcoI1ZqOAnJR2M5hxV9s1eKPja8ATV5prGaxFTQ7tYzvzyavYXMvfUtTUJrH8XaMgytF4orYOY2kI2GxumOOmxNCk/cIpPqby161Fi6MxhA9pb6Bz+2uh1+ahB+hNGUbQhv83dCs+N/I955hX+Xvbyp7efmzfZ84i7P4qOnxltI9M+Rd9x+y1BPZm9zLa01X+4dRQXDjpMgCMw6Di/CuFuROW+/EdfpvMaB9PNkt7ib3ucuLDvqXdpWgfQxc5fw44hl2HbxEMcyoc2L4RLudvc+JuGKZN+7iKoQ06DsJ+cCHepuuu5e+lpYbQbjof2cAoGobO5Mx8RtAW2rD902oZnCgp8qa0ZhYtfzl0j5KSzZIIHCPU0/xG5Mz6pHKGLDSzyysiOJjLYskZwCnaShxAA8Dm+SDhXTtx/s639eZu162FgO47k1j3VmCBk1dG6nRmd4c21s3ZhOHOXHUPJZaM9jFl03FZ/mH8vv2nOPbhT3qCJ0TkYQTzV4WgGxMwiE7n7zDOfwjtIKPoKEd7kHBpHwO679WYRl2wHfm/bZth9idn/Y2M9rEWY6Vh+Yu76XUKi+AMVrRPInjynsPP+Re9+bitlbe2pZCHIdA+pRy+lvIXt+YL4YjNCwtWHXqI5W9yuSRY/gzNsT5Ey/+03HIs7Pwn5G4+KVaomUtreWmpIe6AwWa0TydGrMFAs+rXpJYPED3Vt+GkL6B27yDi34dXVn5Jy1+LDPX0l2MOFMFFU6UgW+RVQF5O+xg62m2OWx8Gsfuu3fQOMsufYln+LGQBNW+UrzIW92kwV8KKocPBvDrOvZ5BM5yq0bU2wDBwAISso0P7pdfmUUQvjwotKwp6iOU/TB3QjKIz+3Itf/P6H7T9EcctuMpTlgaW9jmxjiK1O5a/M2CVCvUEUlP+gNtezYWKyuFbNvwboLsUgGkpepxQtuVvMABzEY2zqYIT5+9ytyIcqy7g8A2J9tELYGZL+UsUGtmcvxvq6aF9nOm768+g3i7INocPlC04fL2WfxvsaB9b+bvWoYR3thBHcdoDsK61Sx2+8t0TEDgnDfWMa/nbFJdkdbce4yHCHL4y2ofF76AXQNA9UUeAG+kk0g0MDUXOA2B31mdZ194KvJZu2EIzj0LRR5x3L0ZBAZJ1K0Iyw2jL35wRtvvrkSAPHcv5aPys8JnQ8sLg39bQxjA6QCwof2sWLVr2B+/w7nEcTvsIyl9rR5thKX874WJEtI8/ii8J8j7l7y5uVJZ/IjAb0IgxbDl97UZip3fwvHArfbJucMCSEKNsdF9CKsBVULowx5QndhMXeZnWqZ9rNOUzp/r2ZuOOFR6gfXz3FodQCjbtQ37LH+0wE7sZ6KSC9bcgf0jDLs/yb/POIIRFbGFwlCXYGSj9CenClL8sokV2bcHPDUhg58H33Cc43EWQuEl8cdgz0Dp8tCEod+FZ7Wd0ok1knL9hR/uEZ5xk+KxJvQBx68u4Dt8iedOOiH6ioiVbOwmDXdG7wYuNPHSMII8/6x+Sno+Cd6W2YLBQBzSj4FC7fs7fvMH7bnIw3PcmQFT+g0bO2agmkvaBN1VGJZx/uy8a6j7rPV1b/DyenPrdxOVGobWVv9XY7YyZbl51M86/DToG7ZHd7myC8h8JOHwl+53C7RBiqCchGOrpNEq9KDh8JaFtlAeYnVjjYcsq9+f2CSp/ecfzyGq14ZwvvUMReTAIE7Afh2M3hrjd6WgsvIOArMyYkb8bc9u/DTx0ufQah8IIsfyjOf+gAvfTAAy58hcdm27K4ODCrEIxxgAGIxBtZEf7+COeyG/5C7SPy/m7+XzEZ7H/tr+93PL3hjaG0T6ejdf1ETfaB+WEenoHPHGgMAemIjo8g0yI8icdRc4Hc1jFgHQGB2AAo6AZI06/9od6AkCu6PWF2QkNo+roN1wZo5S/PRt0AzmSW/5+2uchw1zVvJKPQvfY4xKXG4WWVv42DWPzdjlB+dsOX3tjb5syMZW/eZ/D+TsO3yCVY5YX5PxLWf5gc6on5fxhOqQ1wfrwLPKyI5N8HTOs44kQQz39jj4C41RegvfnlnlC2DiK9jGAj2sLMFXbAax4RHqNM+j6FRkL3yMEhmeAMp/Xb7WaoZ7BZfz+3D6AZfn7lOVIHMs/gvYJOL1F6kMftgwBm/P30T4ey18LWv5auOUfRfswTNrHCVUUlL/HWkW05a/7ZqYB5c96cIYhQZsVLZVE+bPve9vop9HI6UNBzl8IougY2QsM7HH+Njl/Waine8+wEB7u0D6SATaQjryChZB5FF1D1AeNgjonDbS08rdzftgf0EP72Ja/HX5m8+sGY5QV5+5a/m6cf1hYImAONtNoC/6f8QYOpN7wUE/dzO0THuppWqc53Y41bpcu8ir6O1Icy58ZH9AWYypthwHCVYWLcW3hIgDATcVPOdddX/w7r1IICSk0mJ3MpCgMBM4DpuVfZM2KspKkd+Dwxi3jpr3RHxogieAR0+ICIu0TXBgVh/aRhe4W2KR9in7lXzTbTy+PAvQRT7ux5dD8i4jgt/zNd1qMCPWMSj1gO3wH7KgZvQDoMS1/cWDwGRgBSsgw0I6Cq7gkbdBcWWwuaBxJoPxFiLL2Yww0fUiw/O1UID6sme38zMHAiDTaxz02IgwOrsM32PbdrJ6VW/7t0DEQEgpbJd3f2vn8XdrHb/mblnUbiubAQHCsNV038FSHuXXjANsDg5vbR0ZROMm6RvrwaPsVaC/qpo80NNqnCNZ1aMSRoZ45wfI3F3nZ0T6Ww9eyGnt4tLliMgbnrxvATW03YQL141H9FMzU3dQWC/htzu/XjGk4xdrQxbNHsZ8GYMZoWB1eHzF9Jzl/kiwrt77f6jZK0z4u5w8hx3lp2seOpPLTPjLLv5Tyd53zfs7fpH0KuoHONuGcRdn0oxPjiiPWjNEb6ulx6Nr1CM5DLQ3aBzr2YDQORJ9l+ctXVMuyeooyiRDvK2quw7cPo0yjSeLwNSlO079VMl1ECbDH8h+FXHFQcPh6252d1x8j7uK9MIev3QYLnPN852jO35eRtgLOvw1FDHAnDqLgQkNSln8C6D7ah1yagQwzeZlj+dubrIz0YyL1YY0xGffrHzDPCZuohEWcAEB+sNvjuIlM7Cbs0hUsz7RORc5ftshLt5xxPfam4jEs/2KxgAnUj1uK5+ObhX/xnBtgN7Z6AJ0C7WMLFmzcOjNGYdhdSV3oD1xDbO2CRb5sqgINEQavX0ILHAvL7eNfwewqf7nlf6b2Ki4sPOrZk9Y9z4HyAFP5Hkvb0PmH8zx0B1kKsJ87Hcs/nPP3O3wty59th2974L076QwiHb5sKRS7fQucvy+Lqn/mJYuwKnLw3euCw9cJ4wx1+Lr+rc+PXIHzh6+WXlcK4uDTC5P2+VTuOQDezJynD/8aHxn5lfnHsKtQczCsiCovxNX/Yp+MCvX0741QabRPv7C2QYRWJcu/IuVPRP9FRFuJ6HXr37nCuSuIaC0RrSKis4TjZ1vH1hJRyN5w6YADnL+tbHKOlexw/naKh6G9AIA79HOxD2Otcy7nH0X75If3+Y77Qz0Fzl/YvSlQnhWRkgtw/j7L3yqvl+MrfxrqAWBubO6fmfTBjcPuF5S/K1iwcXOxgA4qYhcOMA+MBKkfskJaPX4LwOOADINMEfmdwHFCPR3OH1qQm+3bhdvbr8e/Fu4AnvpRQIahoi5d5DVTPxOv8TS0bX0F6NnmPq9F+/RhFGAUoLGbPM0f51/0rfC1ZXYSi1FwgR1bNFMk528YaCPdS/sYcss/sMLXY/nb17iptZ37KA8YRbSLikvid2J4/VsvGO/EYj42cF0ceDh/HgUNBj6XnwsA6LLyQBGALTwJW/hgs80M9zn3aOCQRV72AjHN/c2CMzewNoRhbxgUtd4iLtpID+7mZyFq8WElSMPyv56ZT7T+zQIAIpoO4LMA3gHgbAC3ElGOiHIAbgFwDoDpAC6yrq0K7BWk0lBPS1EOOpaRlWnTSvK2n8cKjhyX849y+OaHzYFjGw6xjodb/ralKDqfntXfacptKfqcbq60DW7g7i7yAkTLvzTtw9bg1sNjAudEZ5wZCy44fAG5hTliWvq7ebx5YCRo+WvWjIkDln8Mh68we3I5f7fzhuXz9ztoozj/tt7N7h/7NgZkGBrRMYH6PTMjwEw+95vi+eYffTsEoQXLH+aG5U4KkMgVvq6yyTu0T5Dz9ycyk9I+VvuyZfDH+RuSgTHsb8A1UsRzRWvRXjuK6LMNEInDlw0D7SRf0FguRLlFY+Ufhn+ANTwZAEBOOnJCMT/GY/lrIbl93GCCnDOIbOOD3Poion2iromLdnGW5kO1LP9qcf4XALiHmYcBbCCitQBOsc6tZeb1AEBE91jXLq+GEDa1Ylv+9svd3lsA8j2ec9t/9wUMUQcOZXNQ2MdjnXK2//7LGKIOHG/s8ca+2/VYxyat+wsAYL0+CW/J7ZRY/mYD23X/f2AQnZgIr+X/9cJ3cHBhP67BTBzTuxzvwHrLQUYYLALIF/Dmj0/A2wxT9p39RUCDM+XeedfXMEDRqygnWJtU7EdQ+fvXL9ghsvagsOmnpwYUtU1f2Mp/2y3nYcQXGz7d2IMiNOzoK2JC3/PY8eMTAACjLFnC9u8Vz5kL3cz318+djqgGE/YM6Ghf/gS6rXIB4CgMwoDmWFN2OfuGdNAbD6F7yfPOtdOspfyvG8fihG2LsUUoBwDAjCm0Dyv5yIBsO/hAAMDOOy9y3v0ka4FQv6Wcjts3z7GM7QHpkO3zAADdAwYcA57dmcCR3c+Y77N3BMf0rsA2QabR1nuzldjWGz8eUGgExljAsfy3//7L6LTXo5SI9tneW8DRAHQm7B00ADLLGYNhDKEdB2DAkq2AY3qX4xhiPMvmblt7n7gO+5+8xScLcDQg9W+VC7H97RjOOxpsDU/GdLwJAGCBxtpT7MCY+Xdh94JZAIAjsBdLcIx7nsdiIvVh2NCc7/Cvhcvx48IX0YMxmABz1rD7rz9E7yM/88gyxcqQavfrrt9ehGEqnb9Ihum0Hc+x+Y39WUcb2eH7TSL6EoAFAL7DzHsBTAbwsnDNFusYAGz2HT9VVigRXQrgUgA46qijEgmWb+/EvPz7MRunIDfqYPwFH8O4wTy2TT4fi/Yb6BkGHpvwKYw1JqHTcOmKBf0noHfiCRjboeHF4jnoNExrdg+mYk7f0XjHW8Zjzc4+vGvyAViwcS86Dn87ni+eh9FGL9b1dWDdkX8L2vsXvMwfwwHDbTCY8emTjkDvwKGY13UBxumm9b1meBp2HHg6OndpGCoYmHLoQVi1ox3LD78Q43tN5bxaOxYH9bdjz6FnY0HPfmjQsQfAg30HoveID6Bj1xg81P5JdGqHY7TRF3gHMrwxMB3alPdh/DbGIeM7sXF3P9pzGtryGu6f+I9YvcscAOe/5Yu4a5eOFYeci0/23Ysc5Jzm6uFpeOzAT2NU8VF0cDDHkPnepuCgw46E0fec59ySoelYYRyPcVoetx30XSzePxrjOvMgAAeOacfQmL/BywNrcNee48FHnoqn9n8cD+XOxmdyc9HTP4g329+Klw75DKYPeJOybden4q/7TsPJxx6CQtcByB95CsbtAJ6b9FkcN7goIOMzfaOw6Yhzofc8ANlWf9txHPaMPxfjtuXx+0NnYGjHapx49MHo7R+NZ3PnY6y+z3P9goF34tXDPo0JfQcghyKe7DsGpx1zEN4y6jS82H02Oo0BLOkbj91HfQyT9o/Gmr5OdEw8AsNjDsYrQ6vQxsN4ue8Q4C0nYULP6IA8iwen45mDL8TE4QOQZ/mq2g0jx+KRUZ/CGD7EacMb8U705E9Ge3EM5uUuQPvwHrzEH8P44TwOHd+JN3f3Y/Fhn0Zb30S81Hco6PAT0bPrSTww5jM4tXc2lh18Di7svxev9k10ZCvqjEeKH4E+9jgcUtgilWXTyFQsH/UBtPdoOGR8B3oGzRnCLyb/GjveXIYTJo/Hpt0D6Bkq4ozjD8FQwcAvcCN6Ni/HoeM7cPzh47Gyqxd9B30Ur/ZuxUaaDDro45i/eyOW9I5Fx/hJ6J34UbzcswX3DZ+GyWNGoXeogGcmfR5vE773dn0KHjXORH7UFGzaN4yeyR/EkV1PYMWRn8SknrGY23cEjjv8QGjagTjv6InoHRzG3K4LcUBxV+CZNhem4unCqRh72BS8VDhL2vbjotuYikeH349No07G8/oJmJhvR0E38OHjDsHHpx+WuNwoUKmUvET0FABZ7VfCVPC7YNJ6VwM4nJm/QkQ3A3iZme+yyvgtgMes+85m5q9Zx78I4FRm/maUDCeffDIvWLAg/lMpKCgoKICIFjLzybJzJS1/Zj4jZiV3ALBX+WwFIM6Rj7COIeK4goKCgkKNUGm0z+HCn58CsNT6/TCAzxJRBxFNBTANwHwArwKYRkRTiagdplP44UpkUFBQUFAoH5Vy/j8johNh0j5vAvhHAGDmZUR0H0xHbhHA5WwttyWibwJ4AqZ75U5mXlahDAoKCgoKZaIk598IUJy/goKCQvmI4vxbe4WvgoKCgoIUSvkrKCgoZBBK+SsoKChkEEr5KygoKGQQTeHwJaJuAMGkK/FxMMzFaFmCeubWR9aeF1DPXC6OZuZJshNNofwrBREtCPN4tyrUM7c+sva8gHrmNKFoHwUFBYUMQil/BQUFhQwiK8r/9noLUAeoZ259ZO15AfXMqSETnL+CgoKCghdZsfwVFBQUFAQo5a+goKCQQbS08q/lZvG1BBEdSURziWg5ES0jom9ZxycS0WwiWmP9/0DrOBHRjdZ7WExEJ9X3CZLD2gt6ERE9Yv09lYhesZ7tXitVOKx04vdax18hoil1FTwhiGgCEf2ZiFYS0QoiOq3VvzMRfdtq10uJ6G4i6my170xEdxLRTiJaKhwr+7sS0cXW9WuI6OJyZGhZ5V/rzeJrjCLMLTOnA3gfgMutZ5sBYA4zTwMwx/obMN/BNOvfpQB+U3uRU8O3AKwQ/v4pgOuZ+a0A9gL4qnX8qwD2Wsevt65rRvwawOPM/HYA74b57C37nYloMoB/AXAyM58AM/X7Z9F63/l/AZztO1bWdyWiiQB+CHMr3FMA/NAeMGKBmVvyH4DTADwh/H0FgCvqLVeVnvUhAB8HsArmVpoAcDiAVdbv2wBcJFzvXNdM/2Du/DYHwEdh7hpHMFc+5v3fHOaeEadZv/PWdVTvZyjzeQ8AsMEvdyt/Z5h7fW8GMNH6bo8AOKsVvzOAKQCWJv2uAC4CcJtw3HNdqX8ta/nDbUQ2xE3kWwbWNPc9AF4BcCgzd1mntgM41PrdKu/iBgD/CTg7rB8EYB8z2zvLi8/lPLN1fr91fTNhKoBuAL+zqK7/IaIxaOHvzMxbAfwCwCYAXTC/20K09ne2Ue53reh7t7Lyb3kQ0VgAfwHwr8zcI55j0xRomTheIvoEgJ3MvLDestQQeQAnAfgNM78HQD9cKgBAS37nAwFcAHPgewuAMQjSIy2PWnzXVlb+UZvINz2IqA2m4v8jM99vHd5h76ts/X+ndbwV3sX7AZxPRG8CuAcm9fNrABOIyN6OVHwu55mt8wcA2F1LgVPAFgBbmPkV6+8/wxwMWvk7nwFgAzN3M3MBwP0wv30rf2cb5X7Xir53Kyv/lt0snogIwG8BrGDmXwmnHgZge/wvhukLsI9/yYoaeB+A/cL0sinAzFcw8xHMPAXmt3yamT8PYC6Av7Mu8z+z/S7+zrq+qSxkZt4OYDMRHWcd+hjMfbFb9jvDpHveR0SjrXZuP3PLfmcB5X7XJwCcSUQHWjOmM61j8VBvp0eVHSrnAlgNYB2AK+stT4rPdTrMKeFiAK9b/86FyXXOAbAGwFMAJlrXE8zIp3UAlsCMpKj7c1Tw/B8G8Ij1+xgA8wGsBfAnAB3W8U7r77XW+WPqLXfCZz0RwALrWz8I4MBW/84AfgRgJYClAP4AoKPVvjOAu2H6NAowZ3hfTfJdAXzFeva1AC4pRwaV3kFBQUEhg2hl2kdBQUFBIQRK+SsoKChkEEr5KygoKGQQSvkrKCgoZBBK+SsoKChkEEr5KygoKGQQSvkrKCgoZBD/P6I+BSJBVx57AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "# Generate recent 50 interval average\n",
    "average_reward = []\n",
    "for idx in range(len(rewards)):\n",
    "    avg_list = np.empty(shape=(1,), dtype=int)\n",
    "    if idx < 5:\n",
    "        avg_list = rewards[:idx+1]\n",
    "    else:\n",
    "        avg_list = rewards[idx-4:idx+1]\n",
    "    average_reward.append(np.average(avg_list))\n",
    "plt.plot(rewards)\n",
    "plt.plot(average_reward)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f18bae5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44b0354e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58e2db1b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8385f8b3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb4df6e4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
