{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# NPG primal comparison for finite constrained MDPs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nSet-up:\\n1) Softmax policy \\n2) Bounded rewards\\n3) Many states and actions\\n4) Regularized (Natural) policy gradient + primal-dual method\\n   (Natural) policy gradient + optimistic primal-dual method\\n   (Natural) policy gradient + primal method\\n\\nReferences:\\n1) NPG primal   \\n   CRPO: A New Approach for Safe Reinforcement Learning with Convergence Guarantee\\n   http://proceedings.mlr.press/v139/xu21a/xu21a.pdf\\n'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''\n",
    "Set-up:\n",
    "1) Softmax policy \n",
    "2) Bounded rewards\n",
    "3) Many states and actions\n",
    "4) Regularized (Natural) policy gradient + primal-dual method\n",
    "   (Natural) policy gradient + optimistic primal-dual method\n",
    "   (Natural) policy gradient + primal method\n",
    "\n",
    "References:\n",
    "1) NPG primal   \n",
    "   CRPO: A New Approach for Safe Reinforcement Learning with Convergence Guarantee\n",
    "   http://proceedings.mlr.press/v139/xu21a/xu21a.pdf\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.set_printoptions(formatter={'float': lambda x: \"{0:0.6f}\".format(x)})\n",
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy.optimize import linprog"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Random Seed\n",
    "np.random.seed(10) \n",
    "## Problem Setup\n",
    "gamma = 0.9\n",
    "n, m = 20, 5 # s, a\n",
    "'''\n",
    "Randomly generated probability transition matrix P((s,a) -> s') in R^{|S||A| x |S|}\n",
    "Each row sums up to one\n",
    "'''\n",
    "raw_transition = np.random.uniform(0,1,size=(n*m,n))\n",
    "prob_transition = raw_transition/raw_transition.sum(axis=1,keepdims=1)\n",
    "'''\n",
    "Random positive rewards\n",
    "'''\n",
    "reward = np.random.uniform(0,1,size=(n*m))\n",
    "\n",
    "'''\n",
    "Random utilities between -1 and +1\n",
    "'''\n",
    "utility = np.random.uniform(-1,1,size=(n*m))\n",
    "\n",
    "'''\n",
    "Start state distribution\n",
    "'''\n",
    "rho = np.ones(n)/n\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "License: BSD\n",
    "Author: Mathieu Blondel\n",
    "Implements three algorithms for projecting a vector onto the simplex: sort, pivot and bisection.\n",
    "For details and references, see the following paper:\n",
    "Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex\n",
    "Mathieu Blondel, Akinori Fujino, and Naonori Ueda.\n",
    "ICPR 2014.\n",
    "http://www.mblondel.org/publications/mblondel-icpr2014.pdf\n",
    "\"\"\"\n",
    "\n",
    "def projection_simplex_sort(v, z=1):\n",
    "    n_features = v.shape[0]\n",
    "    u = np.sort(v)[::-1]\n",
    "    cssv = np.cumsum(u) - z\n",
    "    ind = np.arange(n_features) + 1\n",
    "    cond = u - cssv / ind > 0\n",
    "    rho = ind[cond][-1]\n",
    "    theta = cssv[cond][-1] / float(rho)\n",
    "    w = np.maximum(v - theta, 0)\n",
    "    return w\n",
    "\n",
    "\n",
    "def projection_simplex_pivot(v, z=1, random_state=None):\n",
    "    rs = np.random.RandomState(random_state)\n",
    "    n_features = len(v)\n",
    "    U = np.arange(n_features)\n",
    "    s = 0\n",
    "    rho = 0\n",
    "    while len(U) > 0:\n",
    "        G = []\n",
    "        L = []\n",
    "        k = U[rs.randint(0, len(U))]\n",
    "        ds = v[k]\n",
    "        for j in U:\n",
    "            if v[j] >= v[k]:\n",
    "                if j != k:\n",
    "                    ds += v[j]\n",
    "                    G.append(j)\n",
    "            elif v[j] < v[k]:\n",
    "                L.append(j)\n",
    "        drho = len(G) + 1\n",
    "        if s + ds - (rho + drho) * v[k] < z:\n",
    "            s += ds\n",
    "            rho += drho\n",
    "            U = L\n",
    "        else:\n",
    "            U = G\n",
    "    theta = (s - z) / float(rho)\n",
    "    return np.maximum(v - theta, 0)\n",
    "\n",
    "\n",
    "def projection_simplex_bisection(v, z=1, tau=0.0001, max_iter=1000):\n",
    "    func = lambda x: np.sum(np.maximum(v - x, 0)) - z\n",
    "    lower = np.min(v) - z / len(v)\n",
    "    upper = np.max(v)\n",
    "\n",
    "    for it in range(max_iter):\n",
    "        midpoint = (upper + lower) / 2.0\n",
    "        value = func(midpoint)\n",
    "\n",
    "        if abs(value) <= tau:\n",
    "            break\n",
    "\n",
    "        if value <= 0:\n",
    "            upper = midpoint\n",
    "        else:\n",
    "            lower = midpoint\n",
    "\n",
    "    return np.maximum(v - midpoint, 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Input: theta as an array and \n",
    "Ouput: array of probabilites corresponding to each state: [\\pi_{s_1}(.), ...., \\pi_{s_n}(.)]\n",
    "'''\n",
    "def project_to_policy(theta,n,m):\n",
    "    prob = []\n",
    "    prob_pers = []\n",
    "    for i in range(n):\n",
    "#         norm = np.sum(np.exp(theta[m*i:m*(i+1)]))\n",
    "        prob_pers = projection_simplex_sort(theta[m*i:m*(i+1)], z=1)\n",
    "        for j in range(m):\n",
    "            prob.append(prob_pers[j])\n",
    "            \n",
    "    return np.asarray(prob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Input: theta as an array and \n",
    "Ouput: array of probabilites corresponding to each state: [\\pi_{s_1}(.), ...., \\pi_{s_n}(.)]\n",
    "'''\n",
    "def theta_to_policy(theta,n,m):\n",
    "    prob = []\n",
    "    for i in range(n):\n",
    "        norm = np.sum(np.exp(theta[m*i:m*(i+1)]))\n",
    "        for j in range(m*i,m*(i+1)):\n",
    "            prob.append(np.exp(theta[j])/norm)\n",
    "            \n",
    "    return np.asarray(prob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Input: theta as an array and \n",
    "Ouput: array of probabilites corresponding to each state: [\\pi_{s_1}(.), ...., \\pi_{s_n}(.)]\n",
    "'''\n",
    "def revise_array_up(arr, x):\n",
    "    revised_arr = [num if num <= x else x for num in arr]\n",
    "    return revised_arr\n",
    "\n",
    "def revise_array_low(arr, x):\n",
    "    revised_arr = [num if num >= x else x for num in arr]\n",
    "    return revised_arr\n",
    "\n",
    "def theta_to_policy_cutoff(theta,n,m):\n",
    "    \n",
    "    # revise theta to be in some interval [-C, C]\n",
    "    C = 10000\n",
    "    revised_theta = revise_array_up(theta, C)\n",
    "    revised_theta = revise_array_low(revised_theta, -C)\n",
    "    \n",
    "#     print('Theta',theta)\n",
    "    \n",
    "#     print('Revised Theta',revised_theta)\n",
    "    \n",
    "    prob = []\n",
    "    for i in range(n):\n",
    "        norm = np.sum(np.exp(revised_theta[m*i:m*(i+1)]))\n",
    "        for j in range(m*i,m*(i+1)):\n",
    "            prob.append(np.exp(revised_theta[j])/norm)\n",
    "            \n",
    "    return np.asarray(prob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Input: theta as an array and \n",
    "Ouput: array of probabilites corresponding to each state: [\\pi_{s_1}(.), ...., \\pi_{s_n}(.)]\n",
    "'''\n",
    "def theta_to_policy_naive(theta,n,m):\n",
    "    prob = []\n",
    "    for i in range(n):\n",
    "        norm = np.sum(theta[m*i:m*(i+1)])\n",
    "        for j in range(m*i,m*(i+1)):\n",
    "            prob.append(theta[j]/norm)\n",
    "            \n",
    "    return np.asarray(prob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Get \\Pi_{\\pi}((s) -> (s,a)) in R^{|S| x |S||A|} matrix corresponding to the policy \\pi using the prob vector\n",
    "'''\n",
    "def get_Pi(prob,n,m):\n",
    "    Pi = np.zeros((n,n*m))\n",
    "    for i in range(n):\n",
    "        Pi[i,i*m:(i+1)*m] = prob[i*m:(i+1)*m]\n",
    "    \n",
    "    return Pi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Input: probability vector, state, action\n",
    "Output: \\nabla_{\\theta} \\pi_{\\theta}(s,a)\n",
    "\n",
    "States go from 0 to n-1 and actons from 0 to m-1\n",
    "'''\n",
    "def grad_state_action(prob,state,action):\n",
    "    grad = np.zeros(n*m)\n",
    "    for j in range(0,m):\n",
    "        if j == action:\n",
    "            grad[m*state + j] = prob[m*state + j]*(1-prob[m*state + j])\n",
    "        else:\n",
    "            grad[m*state + j] = -prob[m*state + action]*prob[m*state + j]\n",
    "            \n",
    "    return grad\n",
    "\n",
    "def grad_state(qvals,prob,state):\n",
    "    grad = np.sum([qvals[state*m + i]*grad_state_action(prob,state,i) for i in range(0,m)],axis=0)\n",
    "    return grad\n",
    "\n",
    "def grad(qvals,prob,d_pi):\n",
    "    grad = np.sum([d_pi[i]*grad_state(qvals,prob,i) for i in range(0,n)],axis=0)\n",
    "    return grad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Input: probability vector\n",
    "Output: Fisher information matrix\n",
    "        \\nabla_{\\theta} \\pi_{\\theta}(s,a) x {\\nabla_{\\theta} \\pi_{\\theta}(s,a)}^T\n",
    "'''\n",
    "def Fisher_info(prob,d_pi):\n",
    "    qvals_one = np.ones(n*m)\n",
    "    grad = np.sum([d_pi[i]*grad_state(qvals_one,prob,i) for i in range(0,n)],axis=0)\n",
    "    fisher = np.outer(grad,grad)+1e-3*np.identity(n*m)\n",
    "    return fisher"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "The overall reward function \\ell(\\theta)\n",
    "'''\n",
    "def ell(qvals,prob,rho):\n",
    "    V = np.zeros(n)\n",
    "    for i in range(n):\n",
    "        V[i] = np.sum([qvals[i*m + j]*prob[i*m + j] for j in range(m)])\n",
    "    \n",
    "    ell = np.dot(V,rho)\n",
    "    return ell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "The overall reward advantage function \\ell(\\theta)\n",
    "'''\n",
    "def avals(qvals,prob):\n",
    "    V = np.zeros(n)\n",
    "    for i in range(n):\n",
    "        V[i] = np.sum([qvals[i*m + j]*prob[i*m + j] for j in range(m)])\n",
    "    \n",
    "    A = qvals\n",
    "    for i in range(n):\n",
    "        for j in range(m):\n",
    "            A[i*m + j] = qvals[i*m + j] - V[i]\n",
    "        \n",
    "    return A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "The projection function\n",
    "Input: a scalar \n",
    "Output: a scalar in the interval [0 C]\n",
    "'''\n",
    "def proj(scalar,gamma):\n",
    "    offset = 1000/(1-gamma)\n",
    "    if scalar < 0:\n",
    "        scalar = 0\n",
    "\n",
    "    if scalar > offset:\n",
    "        scalar = offset\n",
    "\n",
    "    return scalar"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Policy Iteration to check feasibility "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_vec = np.random.uniform(0,1,size=(n,m))\n",
    "prob_vec = raw_vec/raw_vec.sum(axis=1,keepdims=1)\n",
    "init_policy = prob_vec.flatten()## Policy Iteration to get the optimal policy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Policy iteration function\n",
    "'''\n",
    "def policy_iter(q_vals,n,m):\n",
    "    new_policy = np.zeros(n*m)\n",
    "    for i in range(n):\n",
    "        idx = np.argmax(q_vals[i*m:(i+1)*m])\n",
    "        new_policy[i*m + idx] = 1\n",
    "    \n",
    "    return new_policy       "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting policy [0.022861 0.022257 0.496791 0.417151 0.040940 0.260055 0.020741 0.239421\n",
      " 0.113255 0.366528 0.077009 0.252897 0.293405 0.147306 0.229383 0.234235\n",
      " 0.114754 0.214643 0.266094 0.170273 0.019934 0.289380 0.444991 0.163370\n",
      " 0.082325 0.184145 0.550146 0.115828 0.008301 0.141579 0.193071 0.128147\n",
      " 0.056816 0.408135 0.213831 0.164747 0.271480 0.219975 0.227132 0.116667\n",
      " 0.170695 0.097732 0.193326 0.297322 0.240925 0.204368 0.076399 0.025505\n",
      " 0.397828 0.295900 0.232097 0.206203 0.026091 0.237565 0.298044 0.098219\n",
      " 0.185154 0.083181 0.392678 0.240769 0.392924 0.090288 0.311557 0.079063\n",
      " 0.126168 0.106657 0.096738 0.356238 0.324030 0.116337 0.184156 0.268226\n",
      " 0.030368 0.122968 0.394283 0.506945 0.038054 0.101757 0.114022 0.239222\n",
      " 0.075823 0.464715 0.061802 0.241150 0.156510 0.268392 0.088919 0.131564\n",
      " 0.277810 0.233316 0.377841 0.216636 0.191506 0.064988 0.149028 0.467008\n",
      " 0.035160 0.104154 0.047115 0.346563]\n",
      "Final policy [0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 1.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000\n",
      " 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n",
      " 1.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000\n",
      " 0.000000 1.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000\n",
      " 0.000000 1.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000\n",
      " 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000\n",
      " 1.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000\n",
      " 0.000000 0.000000 1.000000 0.000000 0.000000 1.000000 0.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000\n",
      " 1.000000 0.000000 0.000000 0.000000]\n"
     ]
    }
   ],
   "source": [
    "curr_policy = np.random.uniform(0,1,size=(n*m))\n",
    "new_policy = init_policy\n",
    "print('Starting policy',init_policy)\n",
    "\n",
    "while np.count_nonzero(curr_policy - new_policy) > 0:\n",
    "    curr_policy = new_policy\n",
    "    Pi = get_Pi(curr_policy,n,m)\n",
    "    mat = np.identity(n*m) - gamma*np.matmul(prob_transition,Pi)\n",
    "    q_vals_utility = np.dot(np.linalg.inv(mat),utility)\n",
    "    new_policy = policy_iter(q_vals_utility,n,m)\n",
    "    \n",
    "print('Final policy',new_policy)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5.556458336351498\n"
     ]
    }
   ],
   "source": [
    "ell_utility_star = ell(q_vals_utility,new_policy,rho)\n",
    "print(ell_utility_star)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compute the optimal reward value from LP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimal reward value: 8.163862517858446\n",
      "Optimal policy: [0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000\n",
      " 0.999999 0.000001 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000\n",
      " 0.000000 1.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000\n",
      " 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000001\n",
      " 0.000000 0.999999 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000\n",
      " 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000\n",
      " 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.350830 0.000000\n",
      " 0.649170 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000\n",
      " 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 1.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000\n",
      " 0.000000 0.000000 0.000000 1.000000]\n"
     ]
    }
   ],
   "source": [
    "## linear programming solver\n",
    "\n",
    "# minimize c @ x\n",
    "# \n",
    "# such that \n",
    "#          A_ub @ x <= b_ub\n",
    "#          A_eq @ x == b_eq\n",
    "#          lb <= x <= ub\n",
    "\n",
    "c = -reward\n",
    "A_ub = -utility.reshape(1, n*m)\n",
    "b_ub = np.zeros(1)\n",
    "\n",
    "prob_transition_lp = np.transpose(prob_transition)\n",
    "\n",
    "E_sum = np.full_like(prob_transition_lp, 0)\n",
    "for i in range(n):\n",
    "    E_sum[i,m*i:m*(i+1)] = np.ones(m)\n",
    "    \n",
    "A_eq = E_sum - gamma*prob_transition_lp\n",
    "b_eq = rho\n",
    "\n",
    "lb = np.zeros(n*m)\n",
    "ub = np.ones(n*m)\n",
    "ub = ub/(1-gamma)\n",
    "bounds = np.transpose([lb, ub])\n",
    "\n",
    "eps = 0.001\n",
    "\n",
    "res = linprog(c, A_ub, b_ub, A_eq, b_eq, bounds)\n",
    "\n",
    "print('Optimal reward value:',-res.fun)\n",
    "\n",
    "print('Optimal policy:',theta_to_policy_naive(res.x,n,m))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Regularized NPG primal dual method "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Policy search via regularized NPG primal dual method \n",
    "Input: n, m, prob_transition, gamma, reward, utility, stepsize, tau, total_iterates   \n",
    "Output: function values of reward and utility  \n",
    "'''\n",
    "\n",
    "def Reg_NPG_primal_dual(n, m, prob_transition, gamma, reward, utility, stepsize, tau, total_iterates):\n",
    "\n",
    "    theta = np.random.uniform(0,1,size=n*m)\n",
    "    dual = 0\n",
    "    reward_value = []\n",
    "    utility_value = []\n",
    "\n",
    "    for k in range(total_iterates):\n",
    "        prob = theta_to_policy_cutoff(theta,n,m)\n",
    "\n",
    "        Pi = get_Pi(prob,n,m)\n",
    "        mat = np.identity(n*m) - gamma*np.matmul(prob_transition,Pi)\n",
    "        qvals_reward = np.dot(np.linalg.inv(mat),reward)\n",
    "        qvals_utility = np.dot(np.linalg.inv(mat),utility)\n",
    "\n",
    "        P_theta = np.matmul(Pi,prob_transition)\n",
    "        d_pi = (1-gamma)*np.dot(np.transpose((np.linalg.inv(np.identity(n) - gamma*P_theta))),rho)\n",
    "    \n",
    "        # entropy \n",
    "        qvals_entropy = np.dot(np.linalg.inv(mat),-np.log(prob))\n",
    "    \n",
    "        # gradient \n",
    "        qvals = qvals_reward + dual*qvals_utility + tau*qvals_entropy\n",
    "    \n",
    "        # natural gradient ascent\n",
    "        theta += stepsize*avals(qvals,prob)\n",
    "    \n",
    "        # dual desceent \n",
    "        violation_gradient = ell(qvals_utility,prob,rho)\n",
    "        dual = (1-stepsize*tau)*dual - stepsize*violation_gradient\n",
    "        dual = proj(dual,gamma)\n",
    "    \n",
    "    \n",
    "        if k % 1 == 0:\n",
    "        \n",
    "        # record iterates\n",
    "        \n",
    "            # record values\n",
    "            avg_reward = ell(qvals_reward,prob,rho)\n",
    "            avg_utility = ell(qvals_utility,prob,rho)\n",
    "            reward_value.append(avg_reward)\n",
    "            utility_value.append(avg_utility)\n",
    "            \n",
    "    return reward_value, utility_value\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Optimistic NPG primal dual method (OGDA version)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Policy search via optimistic NPG primal dual method\n",
    "Input: n, m, prob_transition, gamma, reward, utility, stepsize, total_iterates   \n",
    "Output: function values of reward and utility  \n",
    "'''\n",
    "\n",
    "def Opt_NPG_primal_dual(n, m, prob_transition, gamma, reward, utility, stepsize, total_iterates):\n",
    "\n",
    "    theta = np.random.uniform(0,1,size=n*m)\n",
    "    theta_h = np.random.uniform(0,1,size=n*m)\n",
    "    dual = 0\n",
    "    dual_h = 0\n",
    "    reward_value = []\n",
    "    utility_value = []\n",
    "    for k in range(total_iterates):\n",
    "    \n",
    "        # optimistic step for (theta, dual)  \n",
    "        Pi = get_Pi(theta,n,m)\n",
    "        mat = np.identity(n*m) - gamma*np.matmul(prob_transition,Pi)\n",
    "        qvals_reward = np.dot(np.linalg.inv(mat),reward)\n",
    "        qvals_utility = np.dot(np.linalg.inv(mat),utility)\n",
    "        violation_gradient = ell(qvals_utility,theta,rho)\n",
    "    \n",
    "        # gradient \n",
    "        qvals = qvals_reward + dual*qvals_utility\n",
    "\n",
    "        # natural gradient ascent\n",
    "        theta = project_to_policy(theta_h + stepsize*qvals,n,m)\n",
    "    \n",
    "        # dual descent \n",
    "        dual = dual_h - stepsize*violation_gradient\n",
    "        dual = proj(dual,gamma)\n",
    "\n",
    "        # optimistic step for (theta_h, dual_h)    \n",
    "        Pi = get_Pi(theta,n,m)\n",
    "        mat = np.identity(n*m) - gamma*np.matmul(prob_transition,Pi)\n",
    "        qvals_reward = np.dot(np.linalg.inv(mat),reward)\n",
    "        qvals_utility = np.dot(np.linalg.inv(mat),utility)\n",
    "        violation_gradient = ell(qvals_utility,theta,rho)\n",
    "        \n",
    "        # gradient \n",
    "        qvals = qvals_reward + dual*qvals_utility\n",
    "    \n",
    "        # natural gradient ascent\n",
    "        theta_h = project_to_policy(theta_h + stepsize*qvals,n,m)\n",
    "    \n",
    "        # dual desceent \n",
    "        dual_h = dual_h - stepsize*violation_gradient\n",
    "        dual_h = proj(dual_h,gamma)\n",
    "    \n",
    "        if k % 1 == 0:\n",
    "            avg_reward = ell(qvals_reward,theta_h,rho)\n",
    "            avg_utility = ell(qvals_utility,theta_h,rho)\n",
    "            reward_value.append(avg_reward)\n",
    "            utility_value.append(avg_utility)\n",
    "            \n",
    "    return reward_value, utility_value"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## NPG primal method "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "Policy search via NPG primal dual method \n",
    "Input: n, m, prob_transition, gamma, reward, utility, stepsize, total_iterates   \n",
    "Output: function values of reward and utility  \n",
    "'''\n",
    "def NPG_primal(n, m, prob_transition, gamma, reward, utility, stepsize, total_iterates):\n",
    "\n",
    "    alpha = 1\n",
    "    beta = 0.7\n",
    "    theta = np.random.uniform(0,1,size=n*m)\n",
    "    reward_value = []\n",
    "    utility_value = []\n",
    "\n",
    "    for k in range(total_iterates):\n",
    "        prob = theta_to_policy(theta,n,m)\n",
    "\n",
    "        Pi = get_Pi(prob,n,m)\n",
    "        mat = np.identity(n*m) - gamma*np.matmul(prob_transition,Pi)\n",
    "        qvals_reward = np.dot(np.linalg.inv(mat),reward)\n",
    "        qvals_utility = np.dot(np.linalg.inv(mat),utility)\n",
    "\n",
    "        P_theta = np.matmul(Pi,prob_transition)\n",
    "        d_pi = (1-gamma)*np.dot(np.transpose((np.linalg.inv(np.identity(n) - gamma*P_theta))),rho)\n",
    "    \n",
    "        # primal method that uses NPG as policy update \n",
    "        if ell(qvals_utility,prob,rho) >= -1/np.sqrt(k+1):\n",
    "            gradient = grad(qvals_reward,prob,d_pi)\n",
    "        else:\n",
    "            gradient = grad(qvals_utility,prob,d_pi)\n",
    "    \n",
    "        # natural gradient\n",
    "        Fisher_inv = np.linalg.pinv(Fisher_info(prob,d_pi))\n",
    "        natural_gradient = np.matmul(Fisher_inv,gradient)\n",
    "            \n",
    "        # natural gradient ascent\n",
    "        theta += stepsize*natural_gradient\n",
    "\n",
    "\n",
    "        if k % 1 == 0:\n",
    "        \n",
    "        # record iterates\n",
    "        \n",
    "            # record values\n",
    "            avg_reward = ell(qvals_reward,prob,rho)\n",
    "            avg_utility = ell(qvals_utility,prob,rho)\n",
    "            reward_value.append(avg_reward)\n",
    "            utility_value.append(avg_utility)\n",
    "        \n",
    "    return reward_value, utility_value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "total_iterates = 1060\n",
    "stepsize = 0.1\n",
    "\n",
    "# call regularized NPG primal dual method\n",
    "\n",
    "# total_iterates = 1000\n",
    "tau = 0.08\n",
    "\n",
    "reward_value_reg, utility_value_reg = Reg_NPG_primal_dual(n, m, prob_transition, gamma, reward, utility, stepsize, tau, total_iterates)\n",
    "\n",
    "# call optimistic NPG primal dual method\n",
    "\n",
    "reward_value_opt, utility_value_opt = Opt_NPG_primal_dual(n, m, prob_transition, gamma, reward, utility, stepsize, total_iterates)\n",
    "\n",
    "# call NPG primal method\n",
    "\n",
    "stepsize = 0.1\n",
    "\n",
    "reward_value_primal, utility_value_primal = NPG_primal(n, m, prob_transition, gamma, reward, utility, stepsize, total_iterates)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de3xU1bnw8d8zM0kmCYFAuJpgUKF4wSI0RbBqFUXRWrBHa/XTHrVaqdbam/a8p/VSe7MXe47aehSpl6OntvrKqbceb1Rtq++xXhBUQKsoRggBkhByv0xmnvePtWcyGQJMQpLJnjzfz2d/Zs/ea9Z+9mx4smfttfYWVcUYY4z/BTIdgDHGmIFhCd0YY7KEJXRjjMkSltCNMSZLWEI3xpgsEcrUhsePH6/Tpk3L1OaNMcaXVq9eXauqE3pbl7GEPm3aNF577bVMbd4YY3xJRCr3tM6aXIwxJktYQjfGmCxhCd0YY7KEJXRjjMkSltCNMSZLWEI3xpgsYQndGGOyRMb6oZs+UIVIBNraoL3dvQIEg24Khbrnu7pc2c7OnlNvy7q6QAQCgZ6v8e31VkdX1+5T8i2YRXrG3dvU2QkdHW5fOjrc1Nnpth/fj9QpEOg5we5xRKM95+NTfD9Tv6u9TbFYzzpS3ydPqrvHF5+S44/Pi7j64pNqd/29bTf+vSYfo/j+JG1HAwGisRjBYBDp7XtP3kbytlOPXXw7gQAaCNDU3My2mhq219QQi8XID4cJh8OE8/ISr7k5OeSGQuQEg4QCASS17lTJ/+aS92lP//6Tv6f4627FlJbWVjQWIxgKuSk3l2AohOyp/t6+o/j83sTrS663l/o3vP02xGJMnz6d3FAIVGltbaVgzhz493/f+zb6wRL6IND2dqKbNhGqq4Pt22HHju7X+nqXGFOn5CQXT3Tx+ba2Xv8BG5NMGPj/0AKM9qaPDXDdA02AUZkOIsXh8Zn3308sK4Duk7IBZgk9Hap0bNzI5nXrqKmqonbrVuqqq9m5bRstO3YwZudOJjY2UtraSnlnJ2Wx2IB/sV0itKki+fmMmjABRIi0t1O3Ywf5ubmMKSiAaJRYMEhbNEo0GKRLhIiIew0EiHjvIyJ0AREgFotRUlzMzBkzQJX21lZefuUVgvn5HLtwIeTmQm4uDz7yCM1tbbRHo3R4UwSIAvE/NcnnJ8WjR/OdK69MnMHc8LOf0dzayvevvppRY8dCXh43L1/O6vXr6QA6ce1/wZQp5NUb8KYbf/5zRo8aBarcfuedrHvnHS77+teZddRREArx8OOP8/iTT6IixESIeWeZARECqgRFyIHEfCAW47Jlyzh0xgyIRnnqf/6Hvzz3HKcsXszCk0+GYJDX1qzhlltvpTMaJertc/KkKTEGgC+ddx7nnHUWRKM8/+yz3LliBccdcwyXXnIJxGJs+ugjrvvhD1Hv+9Ok+mIp8/Hv9rRTT+WKr38dYjFefeUVbvjpTznqyCP5wTXXQCxGZ3s7X/vqV2nv7ES9OpOn5Lpj3rRgwQKuueYaUGXHjh1cdNFFTBw/nrvvugtiMTQa5btXXkn51KnMnjWLgsJCGhoa2NXQ0OO1sbmZ5tZWGpub+fSJJ/K1yy8HET7YtIkrvvENDpo2jVtvvRUAjcU4c+lSUCWQ9N2JF2dvph54IDfdckvirH7pmWeSFw7z4IMPurNv4Bvf+AaNjY0U5OfT0d6emLoiEXo7958wYQJ3LF+e+Df6pfPPp6Gpid/dfz9jxowBEa655hpeX7Omx+ckZUr9t3/fvfe6z6vyzKpVNDU1cebnPkcwFAIRnn7mGU4999w97On+sYTei81vvcX6e+7hkNpaZtTVwSuvkFdby3RgehqfjwKbgEBpKeUVFTBpEh+2tvJvv/sdE2fO5Nof/xhycmiPxfjsWWcRwSW0DqDde41Pbd4U9X4CLj3lFB555BEAHnnoIc455xyWnHoqjz76KABVmzdz4IEH9ml/PztvHo899hgAdVVVnFBWRmlxMVsefDBR5orHH6cm5awiFAqRl5dHTk4OwWCQQCBAMBgkGAwydepUvnPddYmy/3flSurq6rjy299mVEkJAGtefZUXmpsT/yGj0SiRSISurq4er7FYjGg0iqry04svhvHj3f4//jjPvPEGSxYtYtappwLw6ptvck99fZ/2/7RPfYpDFy8G4IWNG/nFf/83hYcdxsKLLgJgy5gx/O6WWwAYNWoUY8aMobi4OPGal5dHc3Mz9V5ya2ho4LTjjoOzzgKg5NBDGR8OU3LssfD5zwOQv20bx0yaxM6dO6mrq6Ouro7m5mZaW1t7TC0tLXR2dhKJRDjiyCPhjDMAmHTUUby9ciWTjjkGzjkHgFzgtxdcQCQSob6+PlHvzp07qa+vp6mpqcfU3NxM0Zw5cPrpAHRs3kzNvHkEJ0+GJUsAl6h+5e1HumKxWKJZbHJrK9fPn09OTg4cdZRbH41y5l130djYmPi+GhsbaWpqoqOjg46ODjo7OxPzHR0dyMc+Bmeemaj/+cJCJkyYQO3RRzNhgrutyS8WLiQ/P3+3eKLRKC0tLT2219DQ4B380xLlpn/nOzQ0NBA+6yzIy3P7/9JLyJQpdHZ2JqZoNEpXVxddXV2J+ZKSEk444QQWLlxI+LjjEp8/xYs52aneMRwMkqlH0FVUVGgm7uUSjUZZu3Ytf/nLX/jgnXcIvf8+RZs3808zZzI3NxfWr4d33939cyUlbGxqQvLyCITDBPPzySkoIGfUKGJTp6LTpxM89FDyZs2i8IgjCITDdHZ2Jv6Bbdiwgd/+9reUlpZy1VVXAbBjxw7mzJnDrl276OjoIDc3l5ycnMRrTk4OZWVlzJ8/n/nz57NgwQLKysoSMakq1dXVtLa2Mn26+1PT1NTEd7/7XQoKChJTfn5+jzqTp7y8PKZMmcLcuXMBiEQivPvuu+Tl5SXqBKiuriYQCJCXl5eYAvG27CES/7ca/wPQ1tZGJBIhPz/fJQygsbGR5uZmYrEYsVgMVSUajSY+H68jPi8iHHDAARQWFgJQX19PY2MjY8eOZfTo0QCJ/7ihUIhgMDik+2xMKhFZraoVva7L9oQei8VYt24dzz33HM8//zx/+9vfGL1rF3cCJ9L7T5RYTg4bR4+mffZsPn7JJTB/PpSX7/mijTHGDJG9JfSsbnKJRCJ85jOfYdWqVYllS4B7AwGKYzFUhOZJk+iYPp28OXMYNX8+HHEEgUMP5WPeTyZjjPGLtBK6iHwb+ArumsVbwJdVtT1pfR5wH/AJoA74gqp+OODR9tHVV1/NqlWrKC4u5szTT+eq2lqOeOYZ12PkM59B7rmHURMmDLsr48YY0x/7bAQVkVLgG0CFqs7CdT5IvUR7MVCvqtOBm4BfDHSgffXkk09y4403EgwGWbViBfe8955L5qEQ/OpX8NhjMKHXe8QbY4wvpdvkEgLyRSSC60a5NWX9UuB6b34lcKuIiGaqgR6YO3cuixYt4itlZVRccgk0NLh28AcecG3ixpeiUdi6FXJyYNKkvl3WaGlxQwFqatz7efN6rvOui+6VKtTWQlWVi6Oqyv3TGj/exZPUaSJtGzfCn//sfjgWFLg44lNBgesc4l3zRbV7n1Vd3LW1bioqgpkz3bqGBnjySTcv4s5jxoyB4mI3lZVBOOzWt7e78s3N0NTkXtva3NCIggI48cTuWB94wH1u9GhX3+jRbrvgXuPf4c6dsHmzq6e1tXtqaXH1Xnyxiwlg7Vo3FCMvr+eYLBEX66RJrtyuXfD3v3fXE4u5XrU5OW769KddeYBNm6Curuf6zk4XT14ezJrV/R3+8Y/d31Py+K3cXFcu3g+hvt4d89ThI5GIi8XraAW441lf78Z+eT1/yc11254yBQbtYW3xq/17m4BvAs1ADXB/L+vXAWVJ798Hxu+tzk984hM62KKRiMYmTnTjvpYuVd25c9C36WexmOqbb6o+9JDqiy+qfvihakdH+p/v7FStru5+X1urevzxqqee6r7+c89Vvegi1euuU733XtUtW3p+vrlZ9aWXVG+7TfWSS1Q/+UnV4mLVz32uu8zWrd3D+kaNUp0zR/ULX1C99lrV++5TbWrqLnvLLarHHqs6bZpqfn7PIYGf/GTP/Q6HVceOdfUtXaq6ZInqokXu83/+c3fZa6/d0/BX9/lks2apjh6tWlKiOnmy6tSpqgcfrDppkost7ve/33OdoFpT01128WK33xMnqubl9Sx34YXd5dav33udzz3XXfaSS/Zcbvbsnt9TKLTnsnfc0V12+fK9bz8a7S5bUbHncsuWdZd76aW917l6dXfZiy/ec7n587vLRSJ7r3PFiu6yt92253KhUM9jf+SRey572WW6X4DXdA95dZ9n6CIyFncGfhCwC3hIRL6kqr/r6x8PEVkGLAMoLS2lsnKPT1LqtxdffJH58+cTCoXIfestpuzYQVdpKVU33wyNjW7KQqrw7rs5FBXFmDIl2ucOOdu3B/nSlyby3nu5PZaLKOPHR/nxj3eyeLHrh756dS5r1uQxblyMsWOjbNqUw4svhnn55TALFrRz553uFHjz5iB/+1vZbtuKu/vuHSxc6Oq8884ibrhhLLHY7oFv395GZeUOABobhcmTD6C9Xdi1K8iaNZA87mPlym1UVHQA8NZbY3nxxdGJdXl5MUpKYpSURCkvj1BZWQfAzp0BoJT6+gD19T3rA3jjjVqmT2/x6hjFmDHFTJ4cZeLEKJMnRykqirFrV4BQiESdAB9+OJXm5t5bNdevb6CychcA48blcO65RQSD0NYmtLUJra2BxPzOndtocZunpmYSzc1hmpt77tPYsVGKitqorGzwvqcgZ5wxNrG9SERoahIaGwM0Ngbo6KihsjICQChUTEnJKAoKYhQWKoWFMcJhJTdXKS/vorLS9evv6oIlS0pobAzQ3ByguVloanLzItDYWE9lpRco+Rx6aDHhsJKfr+Tnx8jP18T7zZt3JmI78MASOjtzaG+XRNqLxdx8ONyS2KfOzhDHHTfOq08JBJRIRLy7PQitrfVUVnYBMGbMGGbNyqerS+jsdGVyciAcjnHAAd3HPhqFxYvHo+q2B913AOjsFMLhRior3eXCjo5Cpk8fTSgEOTmaeHUTiX+jAPPmjaWsLEgsJt6ZvIsjEhHGjm2lsnJw8tA+uy2KyOeBxap6sff+fGC+qn4tqczTwPWq+pKIhIBtwATdS+WD0W3x6aefZvHixSxcuJBnnnmG4C9/Cd//PlxyCaxYMaDbGi7a2+H+++Hmm2HdOrfsK1+B3/7WzdfVwV//6n7ept4iJBaDL37RlYvFXItUWxssWOCaJbZsgepqt+6JJ7qbE66/Hn74w97jOfpoeOkl97O1rQ1efrn77gVtbe7n/EcfuZHQN9wABx3kPnfRRXDffXD44TBnjpuOOsq9HzMmMU6jh7o6eO89N2wg/nrFFXDssW7922+7Oy6UlrqfuYWFe79dyI4d8OGHrqkgGIT8fDcddhhMnNhdLt0/li0tvd/hoaDANSOE+tHHLBZz32FLi/teCgr6Xofxt/3ttvgRMF9ECnCDFk8CUjPxY8AFwEvA2cBze0vmgyU/P5/JkyezcOFCNwDkmWfcilNOGepQBt22bXDbbbB8eXebsHdHAJLGA/H3vycGLO4mEIBFi1yyCgTg2Wddgo2314I7K9u2DcaN6142fz5885tuuzt2wOTJcPLJcNJJ3e2N4JLhCSektz8/+pHbn3i7bjpKSty0p0sihx3mpnSIuCQ7aZL7o7S3culKp02+rwIB1249evS+y5qRJ62BRSLyQ+ALQBewBteF8WpcW85jIhIG/guYA+wEzlXVD/ZW52ANLKqtrWXcuHEEWltdFopG3RWjsWP3/eFhYO1al9QOPthdRAF3NrZxo9uF+Kj+yy93CRDc2ey3vw1f+IL7TDTqzjABnnsObrrJnVmm3lCwqAi+9z23LWOMP+ztDD2ti6KDMQ36RdHHH9/9Csgw8847ql/8ompLS/eyBQtc2IGAu5hXWtp9MeX663t+9swzVf/6V3exyhgzMrA/F0X9YuXKlTQ2NrJkyRLGjx8/rJtbolF3K+TrrnPty9OmwU9+4tYdeKBr4qisdO254JpApk/v7h4Grnvaww8PdeTGmOEsaxL6j3/8Y958803WrFnjEvrTT7sVyZ1Dh4H1690FwFdece8vvBCuvLJ7/QMPuNeODpfQQyF3sbI/F9CMMSNL1qSJ9nbXtSg/P99lwnffdVeOkkePZNAHH7ieKHfc4Xo6lJW5jjd7GoiSl9c9SMQYY9KRNc8UjSf0cDgM8ZtxnXRSRk9tOzu752tq4De/ccuWLXNdDPszqtAYY/Yk687Qw+FwRppb2trcj4ING9z0wguuJ8mzz7r18+bBD34AS5e6XinGGDPQsiaht3lP0wmHQt1ZdAguiP7+9+7i5qZNuz/2s6DA3c9h7FjXf/n66wc9HGPMCJY1CT3Rhr5+vbuLz/Tp3cMQB1Eo5EY9BoOuzfvww91gliOPdE0qY8YMegjGGANkSUKPP4tSRMh57jm3cIiaW045Bd56C2bM6H14ujHGDJWsSOgdHe5mTOFwGIlfEB3E5pann3b3+Pr857tvR2qMMZmWFb1c4s0tE3Nz3d2gQqH0byLSR9u2wT//s3vQevx+08YYMxxkVUI/ScQNwzzmmEG5e1EsBhdc4LognnTSsBuzZIwZ4bIroUejbsEgNbfcdJO7o0BJibvVayArvj1jTLbIipQUT+jHe6+DkdBXr3Z3JgS45x444IAB34QxxuyXrEjokydP5sGf/pSySMSdPs+dO6D1NzbCeee5BxRccQV89rMDWr0xxgyIrEjo48aN45x4h++TT+6+GfgAaWhwN7A98kj45S8HtGpjjBkwWdFtEYDnn3evixYNeNVTp8KLL7qnjffliTrGGDOUsuIMfdOmTWx94w33ZsaMAalTFf70JxIPjp00aUgGnhpjTL9lRUJfvXo1WzdudG8G4Km5qvDd77q28muu2e/qjDFmSGRFQp82bRql8eGaA5DQf/5z+Ld/c08KOv74/a7OGGOGRFYk9IqKCqbEL4ruZ0K/4w74/vfd3RF/9zsbPGSM8Y99JnQRmSkia5OmRhH5VkqZE0SkIanMdYMX8h54t88lP7/fVaxfD1/7mptfvtwN7zfGGL/YZy8XVf0HcBSAiASBKqC3xxO/oKpnDGx46dmyZQtTmpsJwn6dod90kxvev2yZm4wxxk/62uRyEvC+qlYORjD9dc/dd7s+hdDvM/SuLnjpJdfUctVVAxicMcYMkb72Qz8X+MMe1i0QkTeArcBVqro+tYCILAOWAZSWllJZOTB/F+qqqwkCXcEgVVVV/a7n8cdh7do8cnM7GKDQjDFmyKSd0EUkF1gCfK+X1a8D5araLCKnA48Au3UIV9UVwAqAiooKLS8v71fQqUZ5d8nqys1lf+s8+OCBiMgYY4ZeX5pcTgNeV9XtqStUtVFVm735J4AcERk/QDHuU6y5GXAJvT9Wr4bq6oGMyBhjhl5fEvp57KG5RUQmi4h48/O8euv2P7z0qNd+Hu1HQleFCy+E8nI3vN8YY/wqrSYXESkEFgFfTVp2KYCqLgfOBi4TkS6gDThXNT5ofvBpSwsAsX7caOXPf4Z162DKFJg3b6AjM8aYoZNWQlfVFqAkZdnypPlbgVsHNrQ+8Pqg9yeh33STe/3616GfLTbGGDMsZMVIUfGaXLSPXRbffts9FzQ/H7761X2XN8aY4Sw7Eno/R4nefLN7Pf9891wMY4zxs6xI6IH4o+f6kNBra91zQQG+9a29lzXGGD/IigdcBDo6AJDCwrQ/09jonoWhCoceOliRGWPM0MmKhB7s7AT6ltAPPhgee8w9J9QYY7JBVjS5LPj4xwHIGzu2z5/NyRnoaIwxJjOyIqGfcuyxABROmJBW+Q0bYNUq2LlzMKMyxpihlRUJPXGnxTRvnXvPPXDKKfCb3wxiTMYYM8SyIqFXv/8+kP7Aotdfd69z5w5WRMYYM/R8n9BVlT899JB7k0a3RVVYs8bNz5kziIEZY8wQ831Cj0ajHDje3dgxMGrUPstXVkJ9PUyYAKWlgx2dMcYMHd8n9FAoxKnHHefepNGGHj87nzvXPZ3IGGOyhe8TOtCni6Lx9nNrbjHGZBvfJ/RYLEbMu31uOgn9o4/cq10QNcZkG98n9Pfee4/X40+mSOOi6L33Qk0NnH76IAdmjDFDzPdD/9vb20mk8TT7oY8fsofjGWPM0PH9GXp7ezuJNL6PhD50z1AyxpihN6IS+s9+BtOnw3/+52BHZYwxQ29EJfTVq+H99yEYHPSwjDFmyPk+obe1tnYn9H1cFE3ug26MMdlmnwldRGaKyNqkqVFEvpVSRkTk1yKyUUTeFJEhS5mdzc0Ega5AAEJ7vsZbXw+bNkE4DDNnDlV0xhgzdPbZy0VV/wEcBSAiQaAKeDil2GnADG86Grjdex10XU1NAHQEg3vdmfjZ+ezZe837xhjjW31tcjkJeF9VK1OWLwXuU+fvQLGITBmQCPehq7ERgMg+nlRhN+QyxmS7vp6rngv8oZflpcDmpPdbvGXVyYVEZBmwDKC0tJTKytS/C323c8sWwJ2h762+F14YDxRSXl5HZWXzfm/XGGOGm7QTuojkAkuA7/V3Y6q6AlgBUFFRoeXl5f2tKqHI67ISC4fZW33f+Q5UVMBZZ5VQXl6y39s1xpjhpi9n6KcBr6vq9l7WVQFTk96XecsGnXr3cYnm5u613PHHu8kYY7JVX9rQz6P35haAx4Dzvd4u84EGVa3eQ9kBNamoCIBgGvdCN8aYbJZWQheRQmAR8MekZZeKyKXe2yeAD4CNwG+Brw1wnHv02ZNPBmDKIYfsscyqVfCrX8H69UMVlTHGDL20mlxUtQUoSVm2PGlegcsHNrQ0pXEv9AcegLvvhrw8OOKIIYrLGGOGmO9HinY2NACge0no9lBoY8xI4PuEft/ttwOwcevWXtd3dMC6de5xc7NnD2VkxhgztHyf0POiUQBkD2fo69ZBVxd87GNg102NMdnM9wn9n886C4BDZs3qdb3dkMsYM1L4PqHT1gaAFBb2utraz40xI4X/E/o+erkUFsIBB9g9XIwx2c/3Cf3ph92NH7d7d11MdeONUFUFCxcOZVTGGDP0fJ/Q23fuBNy9XPZGZCiiMcaYzPF9Qs/t6gIg5N0CIFlHR6KJ3Rhjsp7vE3q822LOmDG7rXvqKde0/sUvDnVUxhgz9Hyf0HP3ktA//NC9FhcPYUDGGJMhvk/o4VgMgNxesnY8oU+bNnTxGGNMpvg6oXd1dZHvzYdGj95tvSV0Y8xI4uuE3tHRQbz3eW8DiyyhG2NGEl8n9Pb29kRCJz9/t/WW0I0xI4mvE3pbW1t3Qk8ZKdrQALt2uTw/fvyQh2aMMUOuL88UHXba29qYHH+TcoaelwePPuqSug0qMsaMBL5O6B3NzYSACJCTk9NjXTgMS5ZkJCxjjMkIXze5RL37t7QHgxmOxBhjMs/XZ+gfnz4dgKKJE3db99BDsHEjLF0Khx8+1JEZY8zQS+sMXUSKRWSliLwjIm+LyIKU9SeISIOIrPWm6wYn3BTxW+f20sPlD3+A73/fPbHIGGNGgnTP0G8BnlLVs0UkF+jt5uMvqOoZAxdaGvZyL3TrsmiMGWn2eYYuImOA44G7AFS1U1V3DXZg6fjfZ58F4KPa2t3WWUI3xow06ZyhHwTUAPeIyGxgNfBNVW1JKbdARN4AtgJXqer61IpEZBmwDKC0tJTKysr9Cn7rxo0ANHR19airsVGorz+QcDhGa+tm9nMzxhjjC+kk9BAwF7hCVV8WkVuAfwWuTSrzOlCuqs0icjrwCDAjtSJVXQGsAKioqNDy8vL9Cn7CCSfAbbdxyJFHUpBU15tvuteDDgowbdr+bcMYY/winYuiW4Atqvqy934lLsEnqGqjqjZ7808AOSIy6OMz4y3nBSUlPZZbc4sxZiTaZ0JX1W3AZhGZ6S06CdiQXEZEJou48ZgiMs+rt26AY93dHnq5dHXBIYfAjN1+IxhjTPZKt5fLFcD9Xg+XD4Avi8ilAKq6HDgbuExEuoA24FxV1cEIONnrL77IXGDT9u0clLT8n/7JTcYYM5KkldBVdS1QkbJ4edL6W4FbBzCutGzbtAmA7c3NPRK6McaMRL4e+i/xJ0CnNLl0dGQgGGOMyTBfJ/RAe7ubSRlYdMABMHEi7NyZgaCMMSZDfH0vl94SemOjS+ThMIwdm6HAjDEmA3x9hh702lYkKaHHBxFNm2b3QTfGjCz+TuidnQAERo1KLLM+6MaYkcrXCT1kCd0YYxL8ndAjEQCCRUWJZfEml/28q4AxxviOrxN6bi8J3c7QjTEjla97ueRGowDkjB6dWPatb8GJJ8L8+ZmKyhhjMsPXCT2vqwuAnDFjEsuOPdZNxhgz0vi6ySUvFgN6JnRjjBmpfJ3QpxQXAzD54IMB2LwZfvITePzxTEZljDGZ4euEHvIGFuV6if2NN+Daa+E//iOTURljTGb4N6Gr7nY/dOvhYowZyXyb0LWzE6JRukTQkLu2awndGDOS+Tahd+7aBUCLKt7DkiyhG2NGNN8m9PiNuXKTerhYQjfGjGS+TejxYf/5SQ+ItoRujBnJ/DuwKH5B1Lt1bkcHjBvnHhA9aVIG4zLGmAzx7Rn6rq1bAaj1EnteHrz7LtTX233QjTEjU1oJXUSKRWSliLwjIm+LyIKU9SIivxaRjSLypojMHZxwu9V+9BEAH27fnhLrYG/ZGGOGp3SbXG4BnlLVs0UkFyhIWX8aMMObjgZu914HTVdjIwCdXpfFWAwCvv29YYwx+2+fKVBExgDHA3cBqGqnqu5KKbYUuE+dvwPFIjJlwKNNEm1qAiCSkwPA//k/UFICd901mFs1xpjhK50z9IOAGuAeEZkNrAa+qaotSWVKgc1J77d4y6qTKxKRZcAygNLSUirjT6Poh51eG3p7IEBlZSUbNoxn585CWlpqqKxs7Xe9xhjjV+kk9BAwF7hCVV8WkVuAfwWu7evGVHUFsAKgoqJCy/fjsUL1XvuKFhRQXl6ON86IOXMm2NOKjDEjUicBliYAAAvSSURBVDqtzluALar6svd+JS7BJ6sCpia9L/OWDZpYczMA0bw8AHbscMsnThzMrRpjzPC1z4SuqtuAzSIy01t0ErAhpdhjwPleb5f5QIOqVjOYvO6KMUvoxhgDpN/L5Qrgfq+HywfAl0XkUgBVXQ48AZwObARagS8PQqw9JSX09nZobIRQCLw76RpjzIiTVkJX1bVARcri5UnrFbh8AOPaNy+ha34+NTVu0cSJ1g/dGDNy+Xbov7S3Ay6hFxXBrbdaMjfGjGy+T+gUFFBcDJcP7e8DY4wZdnw7tjKYlNCNMcb4OKGXe91Z5n7qU7z6Ktx7L2xI7XtjjDEjiG8T+uhgEIDyww5j5Uq48EJ49NHMxmSMMZnk24SefD9064NujDE+TugN3m1zP6qttYRujDH4OKG31dUB8O6WLZbQjTEGHyf00d590A+cOdMSujHG4OOEXqAKwIzZR1lCN8YY/JrQVRMXRVspQNV1Ry8szHBcxhiTQf4cKRqJQDRKLBhEciO0teXQ0rLvjxljTDbz5xl6WxsATdEoW7ZsQQRGjcpwTMYYk2H+TOiJ5hYIh8OZjcUYY4YJXyf0NuDJJ4v5+MfhxhszG5IxxmSarxN6K7B1ax5vvQW1tZkNyRhjMs33Cb2+3l3XtS6LxpiRzpcJPdrUBLiEXlfndsESujFmpPNlQu9saACgIxBgxw73mCJL6MaYkc6XCT0ST+jBYGKU6IQJGQzIGGOGgbQGFonIh0ATEAW6VLUiZf0JwKPAJm/RH1X1RwMXZk9djY0AREKhHg+INsaYkawvI0VPVNW99SV5QVXP2N+A0hFP6J2hEF/5Cmzdamfoxhjjy6H/8YuikZwcfjRovwOMMcZf0k3oCjwjIgrcoaoreimzQETeALYCV6nq+tQCIrIMWAZQWlpKZWVlv4Juq65mCq4Nvb91GGNMtkk3oR+rqlUiMhFYJSLvqOrfkta/DpSrarOInA48AsxIrcT7Q7ACoKKiQsvLy/sVdENxMQDTP34M771XzrRpMH16v6oyxpiskVYvF1Wt8l53AA8D81LWN6pqszf/BJAjIuMHONaEMTk5AIwr+zSLFsG//MtgbckYY/xjnwldRApFpCg+D5wCrEspM1lExJuf59VbN/DheryRors68gG7IGqMMZBek8sk4GEvX4eA36vqUyJyKYCqLgfOBi4TkS7cPbPOVfUeKTQIWmtrKQAqa7oA67JojDGQRkJX1Q+A2b0sX540fytw68CGtmc1H31EOfDq+u2AJXRjjAGfjhQt8F6jeWWANbkYYwz4NKFP8B4emls8E7AzdGOMAZ8m9PhF0eoGd65uCd0YY3ya0KPNzQDceleIDRtgxm493o0xZuTxZUJvqK4G4KH/uZ/DDoO8vAwHZIwxw4AvE3qosxOAYFFRhiMxxpjhw5cJPScSAeDuB87i6qszHIwxxgwTvrzbYk6XG1D0+j8OpeB/MxyMMcYME/47Q49ECKkSQegix3q4GGOMx38J3euy2Ob9uLCEbowxjm8Teiuua4uNEjXGGMfHCT0M2Bm6McbE+Tihu1vnWkI3xhjHfwm9rQ2AVro45phGDjoow/EYY8ww4b9ui4mLotXceWcVhx02OsMBGWPM8OC/M/REkwuEw+HMxmKMMcOIbxN6eGw5kYidnRtjTJxvE/qW+uM48cSSDAdjjDHDh28TeisF1sPFGGOS+C6hqyV0Y4zpVVoJXUQ+FJG3RGStiLzWy3oRkV+LyEYReVNE5g58qE5XUxMAbeRbQjfGmCR96bZ4oqrW7mHdacAMbzoauN17HXAxL6G3UmDD/o0xJslANbksBe5T5+9AsYhMGaC6e8iLRgFrcjHGmFTpnqEr8IyIKHCHqq5IWV8KbE56v8VbVp1cSESWAcsASktLqays7HPA47ZvpwiX0IPBWiorW/pchzHGZKN0E/qxqlolIhOBVSLyjqr+ra8b8/4QrACoqKjQ8vLyvlYBN9zAls99nfOiB3HIseMpKxvf9zqMMSYLpdXkoqpV3usO4GFgXkqRKmBq0vsyb9mAe6upifnfPodf3b+MsrLB2IIxxvjTPhO6iBSKSFF8HjgFWJdS7DHgfK+3y3ygQVWrGQTNzc1UVVVRU1MzGNUbY4xvpdPkMgl4WETi5X+vqk+JyKUAqroceAI4HdiIu83KlwcnXGhrawduo7p6FNEoBIODtSVjjPGXfSZ0Vf0AmN3L8uVJ8wpcPrCh9W7XrghwGVVV7ZbMjTEmie9Gim7frgDk5jZmOBJjjBlefJfQd+xwr/n5ltCNMSaZ7xJ6XZ0LuaCgOcORGGPM8OLDhO6a/QsLWzMciTHGDC++S+j19TkAFBW1ZTgSY4wZXnyX0EOhFuBNSkqsDd0YY5L5LqEfddT/A2ZzzDFvZToUY4wZVnyX0Nvb2wF7QLQxxqTyXUIPh8OMHz+eoqKiTIdijDHDirhBnkOvoqJCX3ttt4cfGWOM2QsRWa2qFb2t890ZujHGmN5ZQjfGmCxhCd0YY7KEJXRjjMkSltCNMSZLWEI3xpgsYQndGGOyhCV0Y4zJEpbQjTEmS2RspKiI1ACV/fz4eKB2AMMZzkbKvo6U/QTb12w0lPtZrqoTeluRsYS+P0TktT0Nfc02I2VfR8p+gu1rNhou+2lNLsYYkyUsoRtjTJbwa0JfkekAhtBI2deRsp9g+5qNhsV++rIN3RhjzO78eoZujDEmhSV0Y4zJEr5L6CKyWET+ISIbReRfMx3P/hCRqSLyvIhsEJH1IvJNb/k4EVklIu95r2O95SIiv/b2/U0RmZvZPeg7EQmKyBoR+ZP3/iARednbpwdFJNdbnue93+itn5bJuPtCRIpFZKWIvCMib4vIgmw9piLybe/f7joR+YOIhLPlmIrI3SKyQ0TWJS3r83EUkQu88u+JyAWDGbOvErqIBIH/AE4DDgfOE5HDMxvVfukCrlTVw4H5wOXe/vwr8KyqzgCe9d6D2+8Z3rQMuH3oQ95v3wTeTnr/C+AmVZ0O1AMXe8svBuq95Td55fziFuApVT0UmI3b36w7piJSCnwDqFDVWUAQOJfsOab/CSxOWdan4ygi44AfAEcD84AfxP8IDApV9c0ELACeTnr/PeB7mY5rAPfvUWAR8A9girdsCvAPb/4O4Lyk8olyfpiAMu8/wULgT4DgRteFUo8v8DSwwJsPeeUk0/uQxj6OATalxpqNxxQoBTYD47xj9Cfg1Gw6psA0YF1/jyNwHnBH0vIe5QZ68tUZOt3/gOK2eMt8z/v5OQd4GZikqtXeqm3AJG/e7/t/M/AvQMx7XwLsUtUu733y/iT21Vvf4JUf7g4CaoB7vKalO0WkkCw8pqpaBfwK+Aioxh2j1WTfMU3W1+M4pMfXbwk9K4nIKOC/gW+pamPyOnV/1n3ft1REzgB2qOrqTMcyyELAXOB2VZ0DtND9sxzIqmM6FliK+yN2AFDI7k0UWWs4Hke/JfQqYGrS+zJvmW+JSA4umd+vqn/0Fm8XkSne+inADm+5n/f/U8ASEfkQeADX7HILUCwiIa9M8v4k9tVbPwaoG8qA+2kLsEVVX/ber8Ql+Gw8picDm1S1RlUjwB9xxznbjmmyvh7HIT2+fkvorwIzvKvoubgLMI9lOKZ+ExEB7gLeVtV/T1r1GBC/Gn4Brm09vvx874r6fKAh6effsKaq31PVMlWdhjtuz6nqF4HngbO9Yqn7Gv8OzvbKD6uzod6o6jZgs4jM9BadBGwgC48prqllvogUeP+W4/uaVcc0RV+P49PAKSIy1vtFc4q3bHBk+qJDPy5SnA68C7wPXJ3pePZzX47F/WR7E1jrTafj2hWfBd4D/gyM88oLrpfP+8BbuN4FGd+Pfuz3CcCfvPmDgVeAjcBDQJ63POy93+itPzjTcfdh/44CXvOO6yPA2Gw9psAPgXeAdcB/AXnZckyBP+CuDURwv7wu7s9xBC7y9nkj8OXBjNmG/htjTJbwW5OLMcaYPbCEbowxWcISujHGZAlL6MYYkyUsoRtjTJawhG6MMVnCEroxxmSJ/w96zVGLBB+ZHQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot reward value vs policy iterate \n",
    "\n",
    "f_value = plt.figure\n",
    "\n",
    "num_every = 20\n",
    "num_grads = np.arange(0, total_iterates, num_every)\n",
    "\n",
    "reward_value_primal = np.array(reward_value_primal)\n",
    "reward_value_reg = np.array(reward_value_reg)\n",
    "reward_value_opt = np.array(reward_value_opt)\n",
    "\n",
    "plt.plot(num_grads,reward_value_primal[::num_every], \"k-.\", linewidth=2)\n",
    "plt.plot(num_grads,reward_value_reg[::num_every], \"b--\",linewidth=2)\n",
    "plt.plot(num_grads,reward_value_opt[::num_every], \"r-\",linewidth=2)\n",
    "plt.grid(axis='y', color='0.85')\n",
    "plt.draw()\n",
    "get_f_value = plt.gcf()\n",
    "get_f_value.savefig('NPG_primal_comparison_reward.png',dpi=300)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZwUxRnw8V/t7MXusvcux3KJIIioIAgSEE9EwQPFRPGMBo944BkDLy+ixBD19TYGY4wRTQJoPEAUETACEkVBAUEOF5D72Ptkj5l53j9qZg/Ye5cd6Hm+n09/Zrq7prpqeveZ6urqbiMiKKWUcr6QQBdAKaVU69CAr5RSQUIDvlJKBQkN+EopFSQ04CulVJAIDXQBapOcnCzdunULdDGUUuq4snr16kwRSalp3TEb8Lt168aqVasCXQyllDquGGN21LZOu3SUUipIaMBXSqkgoQFfKaWChAZ8pZQKEi0S8I0xbxhjDhpj1tey3hhjXjLGpBtj1hljzmiJ7SqllGq4lmrhvwlcXMf6S4Cevul2YEYLbVcppVQDtUjAF5FlQHYdSa4A3hLrayDeGNOhJbatlFKqYVprHH4asKvK/G7fsn1VExljbsceAZCWlsaOHbUOJ2207dtDeeaZeO67L4+TTipvsXyVUup4cUxdeCUirwGvAQwcOFC6du3aYnnfcw98/DGUl0ezcGGLZauUUseN1hqlswfoXGW+k29ZqzntNPs6fHhrblUppY4drRXw5wE3+UbrnAXkici++j7UkkpL7WtERGtuVSmljh0t0qVjjJkFnAskG2N2A1OBMAAReRX4BBgFpAPFwC0tsd3GKCmxr/v3t/aWlVLq2NAiAV9ExtWzXoC7W2JbTVVUZF+ffRamT4fw8ECWRimlWl/QXGnrdle+37s3cOVQSqlACZqA7+/SAdi1q/Z0SinlVEET8GfMgIED7fudOwNbFqWUCoSgCfjJyXDOOfa9tvCVUsEoaAI+QGfflQAa8JVSweiYutL2aHr4YXjhBfteA75SKhgFTcBftAg8Hpg6FX71q0CXRimlWl/QBHz/lbbXXAMnnxzYsiilVCAETR++f1hmZGRgy6GUUoESdAH/7bfh7ru1H18pFXyCJuD7u3Teew/+8hfYvDmw5VFKqdYWNAHf38Lv0cO+6sVXSqlgEzQnbS+4AA4dgp497bx26Silgk3QtPDnz4clS6BbNzuvAV8pFWyCJuD76dW2SqlgFRRdOh4PZGbaIZka8JVSwSooWvj79kH79nDKKdClC5xwgp2UUiqYBEUL3z9CJyIC4uNh27bAlkcppQIhKFr4/jH4epWtUiqYBUXAr+m2CocOQXFxYMqjlFKBEFQBPyLCvt53H0RFwVtvBa5MSinV2oIi4B/epZOYaF91pI5SKpgERcA/vEtHh2YqpYJRUIzSOeMMeP99+1xbqAz4ej8dpVQwCYqA3749XHll5XyXLvZVW/hKqWASFF06h/O38HfvBq83sGVRSqnWEhQB/5tv4Ikn7HNtwY7QSUyEsjLIyAhs2ZRSqrUERcD/3/9gyhR7x0y/l1+GDz6AmJjAlUsppVpTUPTh13Th1XXXBaYsSikVKEHRwtdbKyilVJC18P1X2gKsXw/z5kHv3nDVVYEpl1JKtaagaOHX1KWzdi1Mngxz5gSmTEop1dqCIuD7u3SqtvD1alulVLAJioDfpo29yrZt28plGvCVUsHGiEigy1CjgQMHyqpVq45a/mVltovHGHsEEBoUZzOUUk5njFktIgNrWhcULfyahIfbWy54vbB3b6BLo5RSR1/QBnzQbh2lVHAJioD/q1/ZG6Z9+WX15T16QPfulaN4lFLKyVok4BtjLjbGbDbGpBtjJtaw/tfGmAxjzBrfNL4ltttQ+/bZVvzhpyv+9S/YuhUuuKA1S6OUUoHR7FOVxhgX8AowAtgNfGuMmSciPx6WdI6I3NPc7TVFTRdeKaVUsGmJFv4gIF1EtolIGTAbuKIF8m0xNV14VZXb3XplUUqpQGmJwYhpQNXTnruBwTWkG2uMGQ5sAR4QkSNOlRpjbgduB0hLS2PHjh0tUDwoKuoIhJGZuYcdOyqj+6ZNYVx3XTvS0tx89NH+FtmWUkodq1pr9PlHwCwRKTXG3AHMBM4/PJGIvAa8BnYcfteuXVtk4/4W/IknplE1y7AwyM4Gl8tFS21LKaWOVS3RpbMH6FxlvpNvWQURyRIR3w0OeB0Y0ALbbbCabq0A0K6dveAqI0NH6iilnK8lWvjfAj2NMSdgA/21QLW7zRtjOojIPt/s5cDGFthugz30EOTmQmxs9eUuF6SlwY4d9nGHPXq0ZqmUUqp1NTvgi4jbGHMPsBBwAW+IyAZjzDRglYjMAyYYYy4H3EA28OvmbrcxHnmk9nWdO9uAv2uXBnyllLO1SB++iHwCfHLYskervJ8ETGqJbbU0/9W2O3cGthxKKXW0Of5KW7cb5s6tfID54fwBf/fu1iuTUkoFguPvEVlUBGPG2P77vLwj148ZY2+7MHRo65dNKaVak+MDfn3Psx0yxE5KKeV0ju/S0dsqKKWUFTQBv67bKrz1Fjz7bOuVSSmlAiFounRqa+GHhMD48VBeDnfdZR+HqJRSThT0LfyQEHvxFehIHaWUszk+4NfXwgcdmqmUCg6O79IZNMg+s9aY2tN06mRfNeArpZzM8QE/PBw6dKg7jT/g67NtlVJO5vgunYbQFr5SKhg4PuAvWQKXXQYvvlh7ms6dbR9/eXnrlUsppVqb47t0fv4Z5s+HlJTa01x+ORw6VHc/v1JKHe8cH/DrG5YJ9r74SinldI7v0mnIsEyllAoGjg/4DWnhA1x6KSQnw9atR79MSikVCI4P+A1t4efmQlaWDs1USjmX4wN+Q1v4OjRTKeV0jg/4vXvD6NHQsWMRzz77LHk1PQUFvb2CUsr5HB/wb7nFDsv87rvJPPzwwwwfPrzGdNrCV0o5neMDvt/w4cOJjY3l+uuvr3G93l5BKeV0jh+Hn5FhH3IyevRV5OVdVWs67dJRSjmd41v4d9wBHTvCxx/Xne7EE2HSJJgwoXXKpZRSrc3xLXz/KJ2lSz/jq68W0b59e8rKypg0aVK1dElJMH16AAqolFKtJGgC/ooVS1i9+hkAjDHceuuttGvXLoAlU0qp1uX4Lh3/hVelpfkAxMXFISJ8XEMfz3ff2Qea79zZmiVUSqnW4fiA72/hl5TkAjBu3DgAPvrooyPSPvUU3HwzrFjRasVTSqlW4/iA72/hHzpkA/4111wDwGeffUaJ/9fAR4dmKqWczPEB3x/Ti4uzATj55JPp168fxcXFfPHFF9XSNvXiKxH44x/h739vZmGVUuoocnzAf/11+PBDOHTI3gYzJiaGyy67DDiyW6epY/HLyuDJJ2H8+MofGKWUOtY4PuCfey6MHu2mpCQLYwxRUVHVAr6IVKRtapdORAR07Wrfr1nTAoVWSqmjwPEBH6CoqAiwrXtjDAMGDKB9+/bs2rWLdevWVaRrzv10Bg+2rytXNre0Sil1dDg+4E+ZAn/4gwEMMTExAISEhDB69GigerdOhw72cYf5+fZ2DA3x3nt2dE+3bna+NQP+oUOHKPWflVZKqXo4OuB7vfDEE/Dcc20BqQj4QEW3zqK5c+Hbb0EElwsOHIDCQght4CVpTz8NEyfaHwk4OgF/5cqV3HzzzVx88cUMGDCArl27Eh0dTVRUFO3btyc9Pb3ePIqLixk5ciSDBg3ikUceYcGCBRQUFLR8YZVSxy4ROSanAQMGSHMVF4uASESERwDp379/xbrCwkKJiIiQ5+wgG5Gnnmp0/uvW2Y/GxYnk54tERdn5gwcbX9a33hL55S9Fhg4V+f776utWrlwpUVFRAlSbjDECyOjRo+vNf8qUKUd83uVyyeTJkyvS5ObmytatW+XQoUONr4BDeDyeivfl5eWybt26Gqeff/5ZSkpKAljS2pWUlMhPP/0kxcXFgS6KCgBgldQSVwMe2GubWiLgZ2fbGsbElAkgw4cPr7b+kksukR/Cw22isLAjI2097rvPfvSuu+z8OefY+fnz6/7cwYMiY8eKpKdXLrvjDvtZELnuuiM/s3z5cvn444/lm2++kW3btkl+fr7s27dP2rZtK4C8884CufdekRtuEBk1SmTiRBGv1342PT1dIiIiBJBXXnlFJk6cKIMHDxaXyyUzZsyo2MY///lPAeTSS++WSZNE3G6RnJwceeKJJ+TDDz+U9PT0agGxPrm5DU5ap4ICkUcfFVmxovGfLSsrk+3bt8vSpUvln//8pzz99NPyu9/9Tm655RZ56623KtJ9+eWXEhERUe1vpLCw8IgfycOnhIQE6dOnj5x//vlyww03yBtvvCFe/xdfiy1btsj9998vgwcPlsWLFze+UlXKt2DBAnnqqafktttuk/PPP1+6dOlS0RA47bTTJLeldsIx7t1335UrrrhC3nnnnYplOTk5snz58lbZvtfrlT179siyZcskKyurYvnbb78to0ePlssuu0yys7NbpSxBG/D37bM1TE31yNKlS+Wbb76ptj73wAHxhoVVRtpTTpG/v3JITj9d5C9/qTvvkhKRxET7sdWr7bLVq0U2b64MtLV58kn7uVdeqVz25Zcizz3nP2Lwyv/9v9Nk1qxZdeazZ4/Is88+K4CceOLpFdXwT19/bdONHj1aALnxxhurfT4vL0/y8/Mr5l966R2JifmHhISUC4i8/bbI0qVLqwW4qKgo6d+/v4wYMULGjh0rt956qzzwwAMydepUefnll2XZsmVSUFAgO3eKdO8uMnVq3d9Ffbxee+QDIm3bivz0U83p9u/fL08//bRMmDBBrrrqKhk0aJB06NChIvjVNN12220Vn1+1apUAUvXvzuPxyKmnnip9+/atNp1yyimSlpYmLpfriDzPOuusw8pf/Y/htttuO+Ioq+qPbkMtXbpUwsPDa6xXSEhIxRHhiBEjpKysrNH5++u/bt06efPNN2XChAkydOhQmTRpUpPyaq78/Hx57733ZOrUqTJ27Fjp1auXfPrppxXr//SnPwkg999/v4iIHDx4UHr27CkxMTGyYcOGFi3L3r17Zc6cOfLYY4/JuHHjZMCAARUNL0DmV2nxPfbYYxIbGysffPBBi5ahLkc94AMXA5uBdGBiDesjgDm+9SuBbvXl2RIBf/t2W8OuXWtJsHq1CIj3hBNEevUSAVl19v0CIhMm1PKZkhKRggKZPdvmXaWX6Eher43kq1dX+xUYPtx+tqYflb59/QF7hLRp00b27dtXY9abN9t0F13kkV69evta5otl5kyRcePsuttuE5k3b54AEhsbW2teWVn2iMDfJWWMyPXX2yOQjRs3ykMPPSQjR46UtLS0elu8gIwfP026d7d5nXZamXz44ecydWqGfPVVzV9TeXm5ZGdny86dO2XDhg2ybt062bBhg2zatEkmTsyo9iM2cqTIZ599JmeeeaY8+OCDFXls3Lix1uCXlpYmZ511lvzyl7+U+++/X554Yro8//yb8rX/F1FE3G63HDp0qN7WeVUej0cOHDgga9eulU8//VReeeUV+eijjyrWr1mzRk488UR58cUXK5Y99dRTEhkZKbfeeqvcd999FeW89957pby8vMbt7Ny5U+6880659957K5bl5ORIWFiYnHnmmXLffffJyy+/LAsWLJAtW7ZIaWmpbNu2TVJTUwWQW265pcH1Ki8vl88//1zuvvtu6dix4xHf57BhwyrSrl+/vsk/Jg3hdrtl0aJFcsMNN9TYpfn0009XpP3xxx9l9uzZku47bPZ6vXLttdcKID169JCcnJwmlyMzM1P+85//yF133SUnn3xyrX/3SUlJctZZZ8miRYuqlWvu3LmSl5fX9C+ikY5qwAdcwFagOxAOrAX6HJbmLuBV3/trgTn15dsSAX/TJlvDXr1qSfC3v4mAfNe7t1yRliYel0sE5HwWyxVXuKWoqEi2bt0qy5cvl9mzZ8s7d94pOTExUhoRIc/1+ouAV/7851ryPnBA5LLLKiPViSeKTJokecvWiCvEKy5XzV0eU6b4f0i+kg8//NAuLCgQWbtWZNs2+yGvV6ZNs+luvllk4cKFAkh0dLTs3r1bNm70t4i90rVrHwHk+eefr7GYP/8skpJSWcwxY0R++MGuKyy0LfRzzhHx9+RkZ2fL119/LZ9++qnMmTNH/va3v8kzzzwjU6ZMkVtvvVX69LlEUlMLBEQGDhR58cWZAr8TEBk2zP7ufffdd5KYmCgpKSkSGRlZx4/HeQJuAZERI/4pN91kj2oWLVokgAwdOrSiHoWFhfLAAw/Ic08/LR+++qqs+ve/ZeeGDVJWViZer8hLL9n6iIg88YRI586VR0CNNW2qW2660SszZtjd4nbXnG7y5MkCyN13312xLC8vTzIzMyvmZ86cWdFSv+iikbJhQ57s3Fk9H//RR2pqarUutapHZ1V5PCL//a/I3LlrpE2bNgLUeRThdrvl008/lfHjx0tycnK1fZCWliZXX321TJ8+XRYuXCgHfSeopk+fLiEhIfLCCy/U8201jsfjkfXr18vEiROlU6dO1coydOhQeeSRR+Ttt9+W77//vt5zTUVFRdKvXz8B5JJLLhF3bTuqDnPmzDniKDE6OlpGjhwpv//97+WNN96QFStWVNundVm7dq0U+v8Qj5K6Ar6x65vOGDMEeExERvrmJwGIyJ+qpFnoS/OVMSYU2A+kSB0bHzhwoKxatapZZdu0CS66CJKSsjnvvCe48MILGTVqVGWCu+6CGTN4vVcvbtu8mY3XXUfvf/+b3aRxSadFrN/dB7D3kJ4G/J7qw5pWJ5xLjy/eJO60rhXLHn8csv+1gGdzbiE08wDExUFkpB3+47OZk1jZ5Vfc9Fw/SEuDtDQ8KSkUlpaydWscIwZkcUXil/z95mWYL5fb23h6PBWfF5eLXOLJ8CSSfEo7Eof24a3Vq3GdeipjH32UyG7dGDoU9n71MwO4mYuTN3Jr//6ErFljby6UnGwfAJCczLo9SSxZl0ybtCRG3pDMCQOSKtaXR8Uxdshe4jLTefS6dHqarZCeDnv3Qnw8pKZWTikpZJHEU8+Hsy8rjC7dw/g/U8P4ds0Kliz4gkPbbiCpLIMbLthHVO569qxeTVvf9xkCuIwh1BhcxpAVGsqm8Db8r+ge1niHsj1mNdc/7ObRKVNg926Kf/iBXV98QZfyctrs32+/24MH7Wt2tv3t8ktI4GCbrvxvb1fKO3Tlqge68Ie/dWDpTx3IDO3AvdM7cNtDsZgQA8XFsG+frd/evfb9vn2wf3+1STIzcXtDyCSZDFLIdqUgySm06ZJCr2GpJPRuB+3a4UlOZkV6Oh1OPZWeHTtCbi7k5FRMxbuz2bYygwM/bCd/6w6SpYxUcoihEE9kKJ37pGDiYqFtW37cvZvUnj1J6tULk5ICKSl2PyUnQ1gYFBdTlFHM4nnFfPZhMXn7i0kJzWFor82E5n/C6MEDCMvPt9sOCbFXC0ZGsuvgQdanp5NRWEgekA2EJifTse8F7Mi/jE170xjYz8PI88s55aRyQjzlUF7Omq++YuZLL3HpsGFcMGCArVturr3U3OWyU2hoxWtuaRtcCbHEpMXZOsXFURYZSXhUFHi9lJaUMO2xx9izaxdlpaXEAG2BLvHxnNm7N327diXW5arct8ZUvoaEQHQ0xMTYyf8+MpKMrCwemzqVgqIiLh09ml9dc40dc11UZPe3fyoqwlNSwo4dO8DrpXu3biBCfl4e786dS0LHjrQ74QQ6n3QSaSeeiCsmxtYtJKT6ZIz9HyspqT6VlrLu++9ZvmwZ/U8/nV8MGmSHERoD4eF2f/gn//w999j3jWSMWS0iA2tcWdsvQUMn4Grg9SrzNwJ/PizNeqBTlfmtQHJd+bZEC99v2rRpAlQbkSIiImedJQJSvnCh/PDDD5KXlSUl/e2yOa6xEhoaKsM6dJD10dEiIB5jZNl558l9qamS4WsSl0REiOfVV23TtbhY5p94b2Vz+ZxzRHbutE3Azz8XufNOyYus0pyuMnlADrpcUtqly5HrXS57mNKli0hMTI2frzYlJEhZTEL96Y6nKT5exH+Cva7JGHvI0r27SGRkg/IucbURb2xs4Ouok05VpyaOsuIot/CvBi4WkfG++RuBwSJyT5U0631pdvvmt/rSZB6W1+3A7QBpaWkDVrTQfYq/++47Vq9eTb9+/TjzzDPtQrebzn37ElJSwq41a/DGx9vl6T+TfOEooikm79rraDt/HiGFhbg7diTzhRcoHTSIwsJCXpo8mYvmzmWsbxu5gwYRlZtL+JYtlBPKe/2mMOS9G2wrwMfrhSFntqdv1nLeGDOb5MIduA4epGz7dqIKCvCnlPBwSvv1o2TwYEoHDaK0f3+kyjUEzz0ZxaxXhZtG7+J34zYRtmULYT/9RNiWLYRv2UKIb3y9OymJve3bE3feeZT27UtZ375I27aEZGcTkpODKyeHkOxs+5qTQ0hurl2enW3f5+dTlpTKx5t6s9nTkzEPJpF8Vic8HTpg8vNxZWXhysoiJCuLvJ9y+PqTEqLCyhg+pIBwyjHl5bY1FRpKaVIqr8/vweaCNC4dH8WgMXFIbCxyWAtJgNC9ewnftImwTZsI37iRsE2bcOXlAeBJTmZHWHe+3NeLQx26MubBROjYDk9SEp7kZLwJCRXf+YH9Ifx6lIuY7N3cNmIjV5+5BdfevbgyMnBlZFCyPYPQjIPEYK/E9oSGI+1ScLdrhyc1FU+7dnhSUvCkpPL6x92hfQrXP9QGb1IieDz2e8rKomBbLru/z2PzikLiSzO4bNBOXFmZdjuZmYTk5CDR0Xjj4iiNjMWVHIs3Lg532zjmfd2FyC4JxJ+UQHzPcF59fwZfrDKcfcb5PPLbHiSFhWEKCwkpKiIkL8/un6wsJCOHsn25xJZmYtxuyiOi+H5zPK62kbTrFkZy13AOlCSwcHVHeg+N4vRzoyiNieH/PPMMo0eNYvjgwZjSUvbv2MHurVvpkXYqCeQTXpRHSF4e6d8WY7Lz6BSfR3ZBOPsyIiksC6ecMAYM9pCYFoY3NpZ9xfFIXCyJ3WMgIZYyE8HXK8JY9nk4E+7OIi7GjfF4+MdfwijYXYSruJA48oglnzjyCMFLWicvp5zmJj8/n4ioKCIiIylxRbE9M54NuxLYuCuBHE8sA84O4cqrD2FE2LYtlJdeigMgFDdRFBNDIYlhBfTunMewfllEUoLHY9iaHsqePUXk5nqBODyEU0Q0xUQRnbyaceMHI5GRvDsvntXftcFLCIIhMclLjx5uYiNLaRdXSP9e+ZjSUkpzSvn6v5CbA4eKIAQvIXhx4cEgjLzCQ3RiGBIRwadfJPD9plhC24QSERVCcWkOeYWZtIkynHtBT35x1iFMeTk7tnj5z7/DiaCUCEr5zQ1ZFE2bbP8vGqlbt25HtYU/BFhYZX4SMOmwNAuBIb73oUAm2B+b2qaWbOHXaP16+yvardsRqx6O+2v1X9qrr7ZjPEVk4UI7bPKrr0Q+eP99Gd+2rWRWSVvUuZecwaoaT+ZmZopceqnIySdXnsP9+OOPJSQkRFwg7zz/vB3c7xvf7fUeOeLH67WnA0BkyZIjtzF71iw5JT5e3n7yyYoP13ReLS+v+rDQutx1l93eb35Td7rc3LqHYr75ps2na9eKKtaY5j//OWyh1yuyd689lyH2e+zUyeY1cWLN+ZSWivziFzbN+eeL1HI+VDZuFBnYu0ASyZRLRx95YtPrFXnoIZtPmzYiW7bUXj+vV6SoqHJ+715bVhG7/UcftQcpX35Zex4icsTQ1zfesCO7yspEPvnEnlCPjrYHMlX37d69R+ZVVlZ5/sUe6U6SNm22S/fuXklLE0lKsnkZI1JXd7zbLbJ4sci991bmJyJyzjleARGXq1QGDfJKamrlv80f/2jP+Tz88MPSvXt3KS4uloIC+yc+efK3MmlSrpx3nq2T39tv279v3+k0AVu24cNF5s2rTFdYaPP5+WeRNWtEHn9c5PTTbfrExMr9XV5e04HhboGFAs9LRMSwiuspNm0SefFFkSuusNfWVP3MqFGV2/aP/gP73Q0fbv9GZs+2I7urfj833lg5GOLw6fTTt1Wky84W6dzZIyNGFMrjj9uBFE3FUT5pGwpsA06g8qTtKYeluZvqJ23fqS/flgj4n3wikpxsT2we4a23bPWvuuqIVY9O8cr3fa4TT9tYe2K3StS98kr7sT/9yc7v379fnn7wQfHecovIQw9Jwf5CMcYjLpe32j9/Vf4/iDVr1khMTIwAMmXKlGpppk61PTjr11f/bHq6SESESLt2NZ8snDNnjqSmpsrMmTMlP1/kmmtsgD086E+fLhISUlmPuvhHBEVE2HPRh69rKLdb5JRTbF5vvFF93Y8/2rIaY9f7h7rWZtkyW36w/1R//GP1wDFhgl2XlnZkmQ9XUmK/j6pDPnfutMF7+nSbT2ho9fzrU1Jiewy7dbPXZZx9dmXwquX8eY3277c/NHBk4BgyRI44wVuXn38ultDQ8hqDj8tlrytprJtuKhOXa3u1vPr1E3nrLbe8/PKrkpSUJGBPdlYMQqjD9ddXlmfECJFXX7XfQUNt3SqyYEH1ZdOni/z1ryJLlpTIOedcIWlpafLrX/9aZs+eXW3MfFVut8i334o8+6w9yT9nTuW6khKRd9+1/5sNOQ/s8dgf4/Xr7d/tww8vF/i1REdfJfv375cDBw7Io48+KsnJyTJ48OBGjRSryVEN+DZ/RgFbsH3zk33LpgGX+95HAu9ih2V+A3SvL8+WCPhz5tgann32fvn3v/8te/bsqVx5//125RNP1Pxhr/eIvblggf1IWFjNrSkRO0YX1gqIfPxx7UOxdu/eXTHM8brrrjtiJ99yi93WH/5w5Gfz8kRWrqyt2F6544475Le//a14vSK9e9t8qv6vFRdLRUvss89qLWI1l15q01cZYShffWW/izvuqN6qqcuiRSIzZ1Z+tZs22SMmf6APDxeZNq1hefmDsX+64AK73OOx319YmNQ6FLQuXq+94tn/HRkjUs8lEUc4cMCOUqpavg4daj4qq8+8efYUBtj9+Yc/2EaZOaoAABI4SURBVMDWFNu22VFYP/0ksmuXSEaGvUq8OaMrZ86cKRAviYljZfHiQlm8eImceuqpFYH+3HPPlVWrVjUor5ISO3qqOS3cY53X65WLL75YAOndu3fFRZGADBw4sNkXaB31gH80ppYI+P5GfPv2iwWodqFGxWD4BjbbDh60reqqrfuabNu2TU48cZGAyDPP2GX79++Xdu1Olu7db5cLLrhErr/++orxvMOGDatxeNncuXZbzf0a/t//s/lcdlnlsldeqcy7oY2J1avtj4M//cGDld0qtV6zUAePxx55+VvpYWEid97ZuBariA0Or74q8uCD1a9r8HrtUUNTHDhQ2T0AIk24LkpERA4dEhk/Xiq6BJpyyw2//fttoG5m4++o8Hg8MnjwYAE75t0fvLp16ybvvfdes1usTrR169ZqQ5Ivv/xyWbp0aYt8V0Eb8F97zdYwNXWuAPKlv/PU47GXbUKNx4uZmbYl5r8w1+sVufxym3z48PoP4+bPF5kwwVvRV7t27VqBG30B5MOKndyjRw/JyMioMY/iYts/CCI7dthlOTmNb4kdOGC7I1wue1RSXm67GaCGvvIGcrvt4ba/W6G0tGn5jBtny3b77bYv9ljidtv+5Pfea35eGRnHZqBuSd98803F33V0dLRMnz49qO/J1BDz58+XiRMnyqZNm1o036AN+C+/LL6TOLMEkLVr19oVW7bYFR071vi5WbPs6rFj7fxffedw4+Iqg29jlJeXy2WXFQmI3HnnBpk5c6bMmDFDDtTTuTx2rN3uSy/Z+XvusSfZGhuo/ecdnnzSBjEQOemkhvU/1uSmm2weKSm2W6CpduywV0MrZ3j11Vfl/vvvr951qlpdXQG/gTcBPj75HzfodhcCVN4e+bvv7OsZZ9T4ucMfhCJir4OYMQO6dGlKSUJZvtx+1Q891IcePfo06FNXXmnvt//BB/YasXffhaysynvvN9RvfmPzeP11Ww+A3/++2ojRBrvmGnjnHft+1qzK76opmvZdqmPVHXfcEegiqHo4+n74/meDuN12XHpDA/7hz7a94w57gem4cQ3f9rZt8MYb8P338NVX9iLEk06CHj0anseoUfZixWXL4P337YWkPXrUWuxajRwJHTvaOojYxzHecEPj8vAbMsS+PvUUXHBB0/JQSgWGowP+OefAtGlQXr4QaHjA79DBXvG8Z0/lk68a25J9/XXbsn7nHfjkE7us6l0dGiIhAV54ARYvhk8/tcuuvbbyqvKGCg2FmTNh61bYsAF+/LFJV2wDcN999g4DjzzStM8rpQKn2VfaHi0tcS8dALfbTVhYGCEhIbjdbgzY+5BkZcGOHbX2K/iD6vTpMGlS47c7dy6MGQPnnWc3tW4dLFoEF17Y+LzKy6F9e3ubmB9+gL59G5+HUio41HUvHUe38OHIB5iza5eNwElJlX03NTj1VPu6fXvTtut/qPkXX9jHH0ZHw9lnNy2vxYttsD/lFA32Sqmmc/RJ22+/hXXr3EBnYmJ8d5us2p1TR9/I3/8OK1bA3Xc3bdvt29uDh507Yd48+/viP2HaWMuX29fzz2/a55VSChwe8P/8Z3jrrSTgfGJivrIL6+m/9zvzTDs1x+DBNuCvXAnjxzc9n1Gj7F16H3+8eeVRSgU3R3fp+IdlQkmDT9i2JP8mVq5sXj7DhsE//mFP4iqlVFM5OuD7h2VCaUAC/vDh9vX774/6ppRSql6O7tLxt/B/+9tbOe+8ksonGMXGQvfuR337v/iFHZLZq9dR35RSStXL0QHf38K/+urL7AlP/4D4/v2b9GCBprjkklbZjFJK1cvRXTr+Fn5kpG9BK3bnKKXUscbRLXx/wP/ss3lERXWhnwZ8pVQQc3TAX7oUnnjiOR5/fCJu9yOVAb9//8AWTCmlAsDRXTqxsfCLX3Tn5puvY8hJJ9lbKbRpo2dRlVJBydEtfIAxY8YwZswYWLLELjj9dHs3MaWUCjKObuFfeSWMHg0FBcCaNXahducopYKUo5u6ixZBURFs3Pgj/XbtIhz0qRtKqaDl6Ba+f5TO4MGncXDjRjuTlBS4AimlVAA5NuC73eDxAHgAD20OHbIrEhMDWCqllAocxwZ8/1W2xtg3Eb774msLXykVrIIm4IcX2OfaagtfKRWsHBvwK2+NbAO+Ky/PzmoLXykVpBwb8MPC4OqrwZgFAITk5toV2sJXSgUpxwb8lBSYNcuNx3MrbY3BlJXZq2zbtAl00ZRSKiAcG/ABCgsLAegcHW0XaOteKRXEHBvwi4thw4ZDQDJp/la9BnylVBBzbMBfuRKGDesAvEuHiAi7UE/YKqWCmGMDftXn2XYIC7NvtYWvlApijg34lcMyS0hxuexbbeErpYKYYwO+v4XfvXsnBvXoYWe0ha+UCmKODfj+Fv7QoQM4u08fO6MtfKVUEHNswPe38CMjgexsO6MtfKVUEHPs/fD9Lfy8vAOU5u4lArSFr5QKao5t4V95Jdx00794553z2P/jj3ahtvCVUkHMsQG/c2cYMqSAM85oQ6zbbRdqC18pFcSaFfCNMYnGmEXGmJ98rwm1pPMYY9b4pnnN2WZj3HnnnaxevZoEr9cu0Ba+UiqINbeFPxFYIiI9gSW++ZocEpF+vunyZm6zQebNg0mT4H8rRE/aKqUUzQ/4VwAzfe9nAmOamV+L+ewzePJJWLci3z7vMDoa/LdYUEqpINTcUTrtRGSf7/1+oF0t6SKNMasAN/CkiHxYUyJjzO3A7QBpaWns2LGjyQXLykoE2vLU7+/kTsAdF8eeZuSnlFLHu3oDvjFmMdC+hlWTq86IiBhjpJZsuorIHmNMd+BzY8wPIrL18EQi8hrwGsDAgQOla9eu9VagNqG+miWSb+dTU2lOfkopdbyrN+CLyIW1rTPGHDDGdBCRfcaYDsDBWvLY43vdZoz5AugPHBHwW5L/wqtECn1vtP9eKRXcmtuHPw+42ff+ZmDu4QmMMQnGmAjf+2RgKPBjM7dbL/+FV0kU+d7okEylVHBrbsB/EhhhjPkJuNA3jzFmoDHmdV+ak4FVxpi1wH+xffitFvAT/QFfW/hKqSDXrJO2IpIFXFDD8lXAeN/7/wGnNmc7TZGaCrBDW/hKKeXj2Ctt33zTDXQjiV12gbbwlVJBzrEB3/8A83b+p11pC18pFeQcH/BTQnxV1Ba+UirIOTbgn3deCpBJIr6razXgK6WCnGMDfk6OAZJIFL1TplJKgYMDflmZASDeW24XaAtfKRXkHBzwQzB4ifX4Wvga8JVSQc6RAV8EystdxJJPiAi0bQv+0TpKKRWkHBnw3W7weqGdK8su0P57pZRyZsD331ahfbgv4Gt3jlJKOTPgh4XBRRctos2hJ+0CbeErpZQzA35kJAwevJyUEN/NO7WFr5RSzX7i1TFr2rRpkJICEyZoC18ppXBoCz8nB2bNgq2r9OHlSinl58iAv20bXHcdfLNAR+kopZSfIwO+f5SOyfY9RVFb+Eop5cyA73+ebZwnw77RgK+UUs4M+JWPN8yzb7RLRymlnB7wC3xvtIWvlFKODPj+Lp0k8n1vtIWvlFKODPglJRCCh3iK7YL4+MAWSCmljgGODPjXXltOPPGEIEh8PIQ69voypZRqMEcG/JKSQhKxz7Q12n+vlFKAQwN+YWEhFb322n+vlFKAQwP+zJkuEpluZ7SFr5RSgEMD/tatkEQnO6MtfKWUAhwa8AsLPSSiN05TSqmqHBnwS0qEJPRpV0opVZUjA35qapfKFr526SilFODQgF9SgnbpKKXUYRwZ8EtLqezS0Ra+UkoBDg34bvdSkkO22xlt4SulFODQgH/SSR+T4P3JzmgLXymlAIcG/EmTJtG1bVs7oy18pZQCHBrwsw+2xVVQgBijd8pUSikfRwb8W6/MAcATmwAhjqyiUko1miOjYenedPsaHRvgkiil1LHDkQE/osg+vLw0OibAJVFKqWNHswK+MeaXxpgNxhivMWZgHekuNsZsNsakG2MmNmebDZHgtc+ylcSEo70ppZQ6bjS3hb8euApYVlsCY4wLeAW4BOgDjDPG9GnmdusU77XPsjXJyUdzM0opdVxp1rP/RGQjgDGmrmSDgHQR2eZLOxu4AvixOduuSwK2hR+WqgFfKaX8WuNhr2nArirzu4HBNSU0xtwO3A6QlpbGjh07Gr0xEUjCtvBLoiPJbkIeSinlRPUGfGPMYqB9Dasmi8jcliyMiLwGvAYwcOBA6dq1a6PzKC0tI5G/A5B8Uk9ME/JQSiknqjfgi8iFzdzGHqBzlflOvmVHRXFxEUnsB7QPXymlqmqNYZnfAj2NMScYY8KBa4F5R2tjBQUFVNxMQW+roJRSFZo7LPNKY8xuYAjwsTFmoW95R2PMJwAi4gbuARYCG4F3RGRD84pdu927D5FEmp3RG6cppVSF5o7S+QD4oIble4FRVeY/AT5pzrYaau/eUgb6q6UtfKWUquC4K21zcw/pw0+UUqoGjgv4BVmFtKUQNy6I1XvpKKWUn+MC/oATegCQH5oIdV8QppRSQcVxAT+soAiAwnDtv1dKqaocF/C9mdkAFGnAV0qpahwX8GNK7Qnb8jg9YauUUlU5LuCf3tcD7dpx2oXtAl0UpZQ6prTGzdNa11VX2UkppVQ1jmvhK6WUqpkGfKWUChIa8JVSKkhowFdKqSChAV8ppYKEBnyllAoSGvCVUipIaMBXSqkgYUQk0GWokTEmA9jRjCySgcwWKs6xLFjqCcFT12CpJwRPXVuznl1FJKWmFcdswG8uY8wqERkY6HIcbcFSTwieugZLPSF46nqs1FO7dJRSKkhowFdKqSDh5ID/WqAL0EqCpZ4QPHUNlnpC8NT1mKinY/vwlVJKVefkFr5SSqkqNOArpVSQcFzAN8ZcbIzZbIxJN8ZMDHR5mssY09kY819jzI/GmA3GmPt8yxONMYuMMT/5XhN8y40x5iVf/dcZY84IbA0axxjjMsZ8b4yZ75s/wRiz0lefOcaYcN/yCN98um99t0CWu7GMMfHGmP8YYzYZYzYaY4Y4cZ8aYx7w/d2uN8bMMsZEOmWfGmPeMMYcNMasr7Ks0fvQGHOzL/1Pxpibj2aZHRXwjTEu4BXgEqAPMM4Y0yewpWo2N/CQiPQBzgLu9tVpIrBERHoCS3zzYOve0zfdDsxo/SI3y33AxirzTwHPi0gPIAf4jW/5b4Ac3/LnfemOJy8Cn4pIb+B0bJ0dtU+NMWnABGCgiPQFXMC1OGefvglcfNiyRu1DY0wiMBUYDAwCpvp/JI4KEXHMBAwBFlaZnwRMCnS5WriOc4ERwGagg29ZB2Cz7/1fgXFV0lekO9YnoJPvn+R8YD5gsFcnhh6+f4GFwBDf+1BfOhPoOjSwnnHA9sPL67R9CqQBu4BE3z6aD4x00j4FugHrm7oPgXHAX6ssr5aupSdHtfCp/APz2+1b5gi+Q9z+wEqgnYjs863aD/if2n48fwcvAI8AXt98EpArIm7ffNW6VNTTtz7Pl/54cAKQAfzD1331ujEmGoftUxHZAzwD7AT2YffRapy5T/0auw9bdd86LeA7ljEmBngPuF9E8quuE9s0OK7H1xpjLgUOisjqQJelFYQCZwAzRKQ/UETloT/gmH2aAFyB/YHrCERzZBeIYx2L+9BpAX8P0LnKfCffsuOaMSYMG+z/JSLv+xYfMMZ08K3vABz0LT9ev4OhwOXGmJ+B2dhunReBeGNMqC9N1bpU1NO3Pg7Ias0CN8NuYLeIrPTN/wf7A+C0fXohsF1EMkSkHHgfu5+duE/9GrsPW3XfOi3gfwv09I0CCMeeIJoX4DI1izHGAH8HNorIc1VWzQP8Z/Rvxvbt+5ff5BsVcBaQV+UQ85glIpNEpJOIdMPut89F5Hrgv8DVvmSH19Nf/6t96Y+p1lRtRGQ/sMsY08u36ALgRxy2T7FdOWcZY6J8f8f+ejpun1bR2H24ELjIGJPgOyK6yLfs6Aj0SY+jcBJlFLAF2ApMDnR5WqA+w7CHheuANb5pFLZvcwnwE7AYSPSlN9iRSluBH7AjJAJej0bW+Vxgvu99d+AbIB14F4jwLY/0zaf71ncPdLkbWcd+wCrffv0QSHDiPgUeBzYB64G3gQin7FNgFvbcRDn2qO03TdmHwK2+OqcDtxzNMuutFZRSKkg4rUtHKaVULTTgK6VUkNCAr5RSQUIDvlJKBQkN+EopFSQ04CulVJDQgK+UUkHi/wOFg1pdaUo5JAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot utility value vs policy iterate \n",
    "\n",
    "g_value = plt.figure\n",
    "\n",
    "num_every = 20\n",
    "num_grads = np.arange(0, total_iterates, num_every)\n",
    "\n",
    "utility_value_primal = np.array(utility_value_primal)\n",
    "utility_value_reg = np.array(utility_value_reg)\n",
    "utility_value_opt = np.array(utility_value_opt)\n",
    "\n",
    "plt.plot(num_grads,utility_value_primal[::num_every], \"k-.\", linewidth=2)\n",
    "plt.plot(num_grads,utility_value_reg[::num_every], \"b--\", linewidth=2)\n",
    "plt.plot(num_grads,utility_value_opt[::num_every], \"r-\", linewidth=2)\n",
    "plt.grid(axis='y', color='0.85')\n",
    "plt.draw()\n",
    "get_g_value = plt.gcf()\n",
    "get_g_value.savefig('NPG_primal_comparison_utility.png',dpi=300)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
