{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from tensorflow.python.ops.rnn import rnn_cell_impl, _should_cache, nest, vs, tensor_shape, _is_keras_rnn_cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"../\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "import os \n",
    "import multiprocessing as mp\n",
    "from qnetwork import *\n",
    "from utils import *\n",
    "import pandas as pd\n",
    "from sklearn.metrics import roc_auc_score, average_precision_score, mean_squared_error\n",
    "rnn = tf.contrib.rnn\n",
    "slim = tf.contrib.slim\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n",
    "import random\n",
    "SEED = 2599\n",
    "np.random.seed(SEED)\n",
    "tf.set_random_seed(SEED)\n",
    "random.seed(SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_shock_train = pd.read_csv(\"../df_all_shock_train_one_seq_per_patient.csv\", index_col=\"TrainSampleIdx\")\n",
    "df_shock_test = pd.read_csv(\"../df_all_shock_test_one_seq_per_patient.csv\", index_col=\"TrainSampleIdx\")\n",
    "df_non_shock_train = pd.read_csv(\"../df_all_non_shock_train_one_seq_per_patient.csv\", index_col=\"TrainSampleIdx\")\n",
    "df_non_shock_test = pd.read_csv(\"../df_all_non_shock_test_one_seq_per_patient.csv\", index_col=\"TrainSampleIdx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_shock_train = df_shock_train[[ \"MinutesFromArrival\", \"Temperature\", \"RespiratoryRate\", \"HeartRate\", \"Bands\", \"Lactate\", \"WBC\", \"Platelet\", \"MAP\", \"SystolicBP\", \"FIO2\", \"PulseOx\", \"BUN\", \"Creatinine\", \"BiliRubin\"]]\n",
    "df_shock_test = df_shock_test[[ \"MinutesFromArrival\", \"Temperature\", \"RespiratoryRate\", \"HeartRate\", \"Bands\", \"Lactate\", \"WBC\", \"Platelet\", \"MAP\", \"SystolicBP\", \"FIO2\", \"PulseOx\", \"BUN\", \"Creatinine\", \"BiliRubin\"]]\n",
    "df_non_shock_train = df_non_shock_train[[ \"MinutesFromArrival\", \"Temperature\", \"RespiratoryRate\", \"HeartRate\", \"Bands\", \"Lactate\", \"WBC\", \"Platelet\", \"MAP\", \"SystolicBP\", \"FIO2\", \"PulseOx\", \"BUN\", \"Creatinine\", \"BiliRubin\"]]\n",
    "df_non_shock_test = df_non_shock_test[[ \"MinutesFromArrival\", \"Temperature\", \"RespiratoryRate\", \"HeartRate\", \"Bands\", \"Lactate\", \"WBC\", \"Platelet\", \"MAP\", \"SystolicBP\", \"FIO2\", \"PulseOx\", \"BUN\", \"Creatinine\", \"BiliRubin\"]]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_df = pd.concat([df_shock_train,df_non_shock_train])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "for col in df_shock_train.columns:\n",
    "    df_shock_train[col] =  (df_shock_train[col] - all_df[col].mean()) / all_df[col].std()\n",
    "    df_non_shock_train[col] =  (df_non_shock_train[col] - all_df[col].mean()) / all_df[col].std()\n",
    "    df_shock_test[col] =  (df_shock_test[col] - all_df[col].mean()) / all_df[col].std()\n",
    "    df_non_shock_test[col] =  (df_non_shock_test[col] - all_df[col].mean()) / all_df[col].std()\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# determine a numerical value to represent nan values\n",
    "_max = -np.infty\n",
    "_min = np.infty\n",
    "for _df in [df_shock_train, df_non_shock_train]:\n",
    "    _df_values = np.copy(_df.values)\n",
    "    _df_values[np.isnan(_df.values)] = 0.\n",
    "    if np.max(_df_values) > _max:\n",
    "        _max = np.max(_df_values)\n",
    "    if np.min(_df_values) < _min:\n",
    "        _min = np.min(_df_values)\n",
    "\n",
    "nan_replacement = 3*_max\n",
    "# nan_replacement = 0."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# determine the max sequence length\n",
    "max_seq_len = -np.infty\n",
    "for _df in [df_shock_train, df_non_shock_train, df_shock_test, df_non_shock_test]:\n",
    "    max_for_current_df = np.max(np.unique(_df.index.values, return_counts=True)[1])\n",
    "    if max_for_current_df > max_seq_len:\n",
    "        max_seq_len = max_for_current_df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace nan values\n",
    "for _df in [df_shock_train, df_non_shock_train, df_shock_test, df_non_shock_test]:\n",
    "    _df[_df.isna()]=nan_replacement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def seq_length(sequence):\n",
    "    used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))\n",
    "    length = tf.reduce_sum(used, 1)\n",
    "    length = tf.cast(length, tf.int32)\n",
    "    return length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_outs = []\n",
    "all_labels = []\n",
    "all_masks = []\n",
    "\n",
    "for idxs in np.asarray(np.split(np.concatenate([df_shock_train.index.unique() for i in range(64)]), 866)):\n",
    "    outs = []\n",
    "\n",
    "    for i in idxs:\n",
    "        current_df = df_shock_train.loc[i]\n",
    "        if isinstance(current_df, pd.core.frame.DataFrame):\n",
    "            current_values = df_shock_train.loc[i].values\n",
    "            out = np.vstack([current_values, np.zeros((max_seq_len-current_values.shape[0], current_values.shape[1]))])\n",
    "        outs += [out]\n",
    "\n",
    "\n",
    "    for i in idxs:\n",
    "        current_df = df_non_shock_train.loc[i]\n",
    "        if isinstance(current_df, pd.core.frame.DataFrame):\n",
    "            current_values = df_non_shock_train.loc[i].values\n",
    "            out = np.vstack([current_values, np.zeros((max_seq_len-current_values.shape[0], current_values.shape[1]))])\n",
    "        outs += [out]\n",
    "\n",
    "    outs = np.asarray(outs)\n",
    "    masks = outs == nan_replacement\n",
    "    masks = masks.astype(np.int)\n",
    "    labels = np.asarray([np.array([0.,1.]) for i in range(64)] + [np.array([1.,0.]) for i in range(64)])\n",
    "\n",
    "    all_outs += [outs]\n",
    "    all_labels += [labels]\n",
    "    all_masks += [masks]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_train():\n",
    "    # Output mask's dimensions correspond to [num_timesteps, batch_size, num_input/sequence_length]\n",
    "    for i in range(len(all_outs)):\n",
    "            \n",
    "        yield all_outs[i], all_labels[i], all_masks[i]\n",
    "\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_test():\n",
    "    # Output mask's dimensions correspond to [num_timesteps, batch_size, num_input/sequence_length]\n",
    "    for i in df_shock_test.index.unique():\n",
    "        current_df = df_shock_test.loc[i]\n",
    "        if isinstance(current_df, pd.core.frame.DataFrame):\n",
    "            current_values = df_shock_test.loc[i].values\n",
    "            out = np.vstack([current_values, np.zeros((max_seq_len-current_values.shape[0], current_values.shape[1]))])\n",
    "            mask = out == nan_replacement\n",
    "            mask = mask.astype(np.int)\n",
    "            label = np.array([0., 1.])\n",
    "            yield out, label, mask\n",
    "    for i in df_non_shock_test.index.unique():\n",
    "        current_df = df_non_shock_test.loc[i]\n",
    "        if isinstance(current_df, pd.core.frame.DataFrame):\n",
    "            current_values = df_non_shock_test.loc[i].values\n",
    "            out = np.vstack([current_values, np.zeros((max_seq_len-current_values.shape[0], current_values.shape[1]))])\n",
    "            mask = out == nan_replacement\n",
    "            mask = mask.astype(np.int)\n",
    "            label = np.array([1., 0.])\n",
    "            yield out, label, mask\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([1.96707608, 1.50105805, 0.89303454, 0.44501721, 0.19950772,\n",
       "        0.21050814, 0.43001663, 0.91603543, 1.54655981, 1.89157316]),\n",
       " array([2.06842961e-05, 1.00016817e-01, 2.00012949e-01, 3.00009082e-01,\n",
       "        4.00005214e-01, 5.00001347e-01, 5.99997479e-01, 6.99993612e-01,\n",
       "        7.99989744e-01, 8.99985877e-01, 9.99982009e-01]),\n",
       " <a list of 10 Patch objects>)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD8CAYAAABw1c+bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEupJREFUeJzt3X+MXWd95/H3p04CasmCwUMXxTYTtG5FoJDQkaHKagkqGBO2cauyW1ulBBTWEiXt9ocqmVZKqkSV0qLdSqhpg2mtlNWS0NLSThtDcPnRtKVmPYE0kNAU46ZkZCRPcZqWhpI6fPePeyJdxjOeMzN35mb8vF/Slc95nuec+31s6zNnzj33nFQVkqR2fMe4C5AkrS+DX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYC8ZdwEK2bNlSk5OT4y5DkjaMe++99x+raqLP2Kdl8E9OTjIzMzPuMiRpw0jyD33HeqpHkhpj8EtSY5YM/iTbknwyyReTPJDkfy4wJknek+R4kvuTvGKo79okX+pe1456ApKk5elzjv8M8PNV9dkkFwP3JjlSVQ8OjXkDsKN7vRL4LeCVSZ4L3AhMAdVtO11Vj450FpKk3pY84q+qr1bVZ7vlfwG+CFwyb9ge4P01cBR4TpIXAK8HjlTV6S7sjwC7RzoDSdKyLOscf5JJ4ArgM/O6LgEeGVqf7doWa19o3/uTzCSZmZubW05ZkqRl6B38SZ4F/AHwM1X1z/O7F9ikztF+dmPVwaqaqqqpiYlel6JKklagV/AnuZBB6P/fqvrDBYbMAtuG1rcCJ8/RLkkakz5X9QT4HeCLVfW/Fxk2Dbylu7rnVcBjVfVV4G5gV5LNSTYDu7o2SdKY9Lmq50rgJ4DPJ7mva/tFYDtAVd0GHAauBo4DjwNv6/pOJ7kZONZtd1NVnR5d+WebPHDXWu5+UQ/f8saxvK8kLdeSwV9Vf8nC5+qHxxTwzkX6DgGHVlSdJGnk/OauJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaszT8glckjRO5/v3gTzil6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYJW/ZkOQQ8F+BU1X10gX6fwH48aH9vRiY6B67+DDwL8CTwJmqmhpV4ZKklelzxH87sHuxzqp6d1VdXlWXA+8C/nzec3Vf0/Ub+pL0NLBk8FfVPUDfB6TvA+5YVUWSpDU1snP8Sb6TwW8GfzDUXMDHktybZP+o3kuStHKjvC3zDwF/Ne80z5VVdTLJ84EjSf62+w3iLN0Phv0A27dvH2FZkqRho7yqZy/zTvNU1cnuz1PAh4Gdi21cVQeraqqqpiYmJkZYliRp2EiCP8mzgVcDfzzU9l1JLn5qGdgFfGEU7ydJWrk+l3PeAVwFbEkyC9wIXAhQVbd1w34E+FhV/evQpt8NfDjJU+/zgar66OhKlyStxJLBX1X7eoy5ncFln8NtJ4CXr7QwSdLa8Ju7ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGjPJh65I0MpMH7hp3CeetJY/4kxxKcirJgs/LTXJVkseS3Ne9bhjq253koSTHkxwYZeGSpJXpc6rndmD3EmP+oqou7143ASTZBNwKvAG4DNiX5LLVFCtJWr0lg7+q7gFOr2DfO4HjVXWiqp4A7gT2rGA/kqQRGtWHuz+Q5G+SfCTJS7q2S4BHhsbMdm2SpDEaxYe7nwVeWFVfT3I18EfADiALjK3FdpJkP7AfYPv27SMoa32N84Ooh29549jeW9LGs+oj/qr656r6erd8GLgwyRYGR/jbhoZuBU6eYz8Hq2qqqqYmJiZWW5YkaRGrDv4k/zFJuuWd3T6/BhwDdiS5NMlFwF5gerXvJ0lanSVP9SS5A7gK2JJkFrgRuBCgqm4D3gS8I8kZ4BvA3qoq4EyS64G7gU3Aoap6YE1mIUnqbcngr6p9S/T/BvAbi/QdBg6vrDRJ0lrwlg2S1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhqzZPAnOZTkVJIvLNL/40nu716fTvLyob6Hk3w+yX1JZkZZuCRpZfoc8d8O7D5H/98Dr66qlwE3Awfn9b+mqi6vqqmVlShJGqU+D1u/J8nkOfo/PbR6FNi6+rIkSWtl1Of4rwM+MrRewMeS3Jtk/7k2TLI/yUySmbm5uRGXJUl6ypJH/H0leQ2D4P/PQ81XVtXJJM8HjiT526q6Z6Htq+og3WmiqampGlVdkqRvN5Ij/iQvA34b2FNVX3uqvapOdn+eAj4M7BzF+0mSVm7VwZ9kO/CHwE9U1d8NtX9XkoufWgZ2AQteGSRJWj9LnupJcgdwFbAlySxwI3AhQFXdBtwAPA/4zSQAZ7oreL4b+HDXdgHwgar66BrMQZK0DH2u6tm3RP/bgbcv0H4CePnZW0iSxslv7kpSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjegV/kkNJTiVZ8Jm5GXhPkuNJ7k/yiqG+a5N8qXtdO6rCJUkr0/eI/3Zg9zn63wDs6F77gd8CSPJcBs/ofSWwE7gxyeaVFitJWr1ewV9V9wCnzzFkD/D+GjgKPCfJC4DXA0eq6nRVPQoc4dw/QCRJa2xU5/gvAR4ZWp/t2hZrlySNyaiCPwu01Tnaz95Bsj/JTJKZubm5EZUlSZpvVME/C2wbWt8KnDxH+1mq6mBVTVXV1MTExIjKkiTNN6rgnwbe0l3d8yrgsar6KnA3sCvJ5u5D3V1dmyRpTC7oMyjJHcBVwJYkswyu1LkQoKpuAw4DVwPHgceBt3V9p5PcDBzrdnVTVZ3rQ2JJ0hrrFfxVtW+J/gLeuUjfIeDQ8kuTJK0Fv7krSY0x+CWpMQa/JDWm1zl+Se2aPHDXuEvQiHnEL0mNMfglqTGe6jkPjOtX8YdveeNY3lfS6njEL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxvYI/ye4kDyU5nuTAAv2/nuS+7vV3Sf5pqO/Job7pURYvSVq+Je/Vk2QTcCvwOmAWOJZkuqoefGpMVf3s0PifAq4Y2sU3qury0ZUsSVqNPkf8O4HjVXWiqp4A7gT2nGP8PuCOURQnSRq9PsF/CfDI0Pps13aWJC8ELgU+MdT8zCQzSY4m+eEVVypJGok+t2XOAm21yNi9wIeq6smhtu1VdTLJi4BPJPl8VX35rDdJ9gP7AbZv396jLEnSSvQ54p8Ftg2tbwVOLjJ2L/NO81TVye7PE8Cn+Pbz/8PjDlbVVFVNTUxM9ChLkrQSfYL/GLAjyaVJLmIQ7mddnZPke4HNwF8PtW1O8oxueQtwJfDg/G0lSetnyVM9VXUmyfXA3cAm4FBVPZDkJmCmqp76IbAPuLOqhk8DvRh4b5JvMfghc8vw1UCSpPXX69GLVXUYODyv7YZ567+8wHafBr5vFfVJkkbMb+5KUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWpMr+BPsjvJQ0mOJzmwQP9bk8wlua97vX2o79okX+pe146yeEnS8i35zN0km4BbgdcBs8CxJNMLPDT9g1V1/bxtnwvcCEwBBdzbbfvoSKqXJC1bnyP+ncDxqjpRVU8AdwJ7eu7/9cCRqjrdhf0RYPfKSpUkjUKf4L8EeGRofbZrm+9Hk9yf5ENJti1zW5LsTzKTZGZubq5HWZKklegT/Fmgreat/wkwWVUvA/4M+N1lbDtorDpYVVNVNTUxMdGjLEnSSvQJ/llg29D6VuDk8ICq+lpVfbNbfR/w/X23lSStrz7BfwzYkeTSJBcBe4Hp4QFJXjC0eg3wxW75bmBXks1JNgO7ujZJ0pgseVVPVZ1Jcj2DwN4EHKqqB5LcBMxU1TTw00muAc4Ap4G3dtueTnIzgx8eADdV1ek1mIckqaclgx+gqg4Dh+e13TC0/C7gXYtsewg4tIoaJUkj1Cv4pYVMHrhrbO/98C1vHNt7Sxudt2yQpMZ4xC9tAOP87UrnH4/4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjegV/kt1JHkpyPMmBBfp/LsmDSe5P8vEkLxzqezLJfd1rev62kqT1teT9+JNsAm4FXgfMAseSTFfVg0PDPgdMVdXjSd4B/BrwY13fN6rq8hHXLUlaoT5H/DuB41V1oqqeAO4E9gwPqKpPVtXj3epRYOtoy5QkjUqf4L8EeGRofbZrW8x1wEeG1p+ZZCbJ0SQ/vNhGSfZ342bm5uZ6lCVJWok+j17MAm214MDkzcAU8Oqh5u1VdTLJi4BPJPl8VX35rB1WHQQOAkxNTS24f0nS6vU54p8Ftg2tbwVOzh+U5LXALwHXVNU3n2qvqpPdnyeATwFXrKJeSdIq9Qn+Y8COJJcmuQjYC3zb1TlJrgDeyyD0Tw21b07yjG55C3AlMPyhsCRpnS15qqeqziS5Hrgb2AQcqqoHktwEzFTVNPBu4FnA7ycB+EpVXQO8GHhvkm8x+CFzy7yrgSRJ66zPOX6q6jBweF7bDUPLr11ku08D37eaAqWnk8kDd427BGnVegW/9HRjAEsr5y0bJKkxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mN6RX8SXYneSjJ8SQHFuh/RpIPdv2fSTI51Peurv2hJK8fXemSpJVYMviTbAJuBd4AXAbsS3LZvGHXAY9W1X8Cfh341W7byxg8nP0lwG7gN7v9SZLGpM8R/07geFWdqKongDuBPfPG7AF+t1v+EPCDGTx1fQ9wZ1V9s6r+Hjje7U+SNCZ9gv8S4JGh9dmubcExVXUGeAx4Xs9tJUnrqM/D1rNAW/Uc02fbwQ6S/cD+bvXrSR7qUdtCtgD/uMJtNyrnfP5rbb7Q4Jzzq6ua8wv7DuwT/LPAtqH1rcDJRcbMJrkAeDZwuue2AFTVQeBgv7IXl2SmqqZWu5+NxDmf/1qbLzjntdTnVM8xYEeSS5NcxODD2ul5Y6aBa7vlNwGfqKrq2vd2V/1cCuwA/t9oSpckrcSSR/xVdSbJ9cDdwCbgUFU9kOQmYKaqpoHfAf5PkuMMjvT3dts+kOT3gAeBM8A7q+rJNZqLJKmHPqd6qKrDwOF5bTcMLf8b8N8W2fZXgF9ZRY3LterTRRuQcz7/tTZfcM5rJoMzMpKkVnjLBklqzIYN/tXcRmIj6jHfn0vyYJL7k3w8Se9Lu56ulprz0Lg3JakkG/4KkD5zTvLfu3/rB5J8YL1rHLUe/7e3J/lkks91/7+vHkedo5LkUJJTSb6wSH+SvKf7+7g/yStGXkRVbbgXgw+Zvwy8CLgI+BvgsnljfhK4rVveC3xw3HWv8XxfA3xnt/yOjTzfvnPuxl0M3AMcBabGXfc6/DvvAD4HbO7Wnz/uutdhzgeBd3TLlwEPj7vuVc75vwCvAL6wSP/VwEcYfA/qVcBnRl3DRj3iX81tJDaiJedbVZ+sqse71aMMvjOxkfX5Nwa4Gfg14N/Ws7g10mfO/wO4taoeBaiqU+tc46j1mXMB/6FbfjaLfBdoo6iqexhc/biYPcD7a+Ao8JwkLxhlDRs1+FdzG4mNaLm3vriOwRHDRrbknJNcAWyrqj9dz8LWUJ9/5+8BvifJXyU5mmT3ulW3NvrM+ZeBNyeZZXB14U+tT2ljs+a3uul1OefT0GpuI7ERLefWF28GpoBXr2lFa++cc07yHQzuBPvW9SpoHfT5d76Awemeqxj8VvcXSV5aVf+0xrWtlT5z3gfcXlX/K8kPMPjO0Eur6ltrX95YrHl2bdQj/uXcRoJ5t5HYiHrd+iLJa4FfAq6pqm+uU21rZak5Xwy8FPhUkocZnAud3uAf8Pb9f/3HVfXvNbjj7UMMfhBsVH3mfB3wewBV9dfAMxncx+d81ftWNyu1UYN/NbeR2IiWnG932uO9DEJ/o5/3hSXmXFWPVdWWqpqsqkkGn2tcU1Uz4yl3JPr8v/4jBh/kk2QLg1M/J9a1ytHqM+evAD8IkOTFDIJ/bl2rXF/TwFu6q3teBTxWVV8d5RtsyFM9tYrbSGxEPef7buBZwO93n2F/paquGVvRq9RzzueVnnO+G9iV5EHgSeAXqupr46t6dXrO+eeB9yX5WQanPN66gQ/iSHIHg1N1W7rPLW4ELgSoqtsYfI5xNYPnlzwOvG3kNWzgvz9J0gps1FM9kqQVMvglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWrM/weW0s/QV9+kXgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as stats\n",
    "\n",
    "lower, upper = 0, 1\n",
    "mu, sigma = 0, 0.2\n",
    "left_truncnorm = stats.truncnorm(\n",
    "    (lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)\n",
    "right_truncnorm = stats.truncnorm(\n",
    "    (lower - 1.) / sigma, (upper - 1.) / sigma, loc=1., scale=sigma)\n",
    "\n",
    "fig, ax = plt.subplots(1, sharex=True)\n",
    "ax.hist(np.concatenate([left_truncnorm.rvs(10000),right_truncnorm.rvs(10000)]), normed=True)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(args):\n",
    "    SEED = 2599\n",
    "    np.random.seed(SEED)\n",
    "    tf.set_random_seed(SEED)\n",
    "    random.seed(SEED)\n",
    "    start_learning_rate, decay_step, decay_rate, actor_lr, critic_lr = args\n",
    "#     actor_lr, critic_lr = args\n",
    "    # actor_lr, critic_lr = 0.0001, 0.0005\n",
    "\n",
    "    #     Training Parameters\n",
    "#     start_learning_rate = 0.0005\n",
    "#     decay_step = 500\n",
    "#     decay_rate = 1.\n",
    "    num_hidden = 1024 # hidden layer num of features\n",
    "\n",
    "    #     actor_lr = 0.0001\n",
    "    #     critic_lr = 0.001\n",
    "    rl_reward_thres_for_decay = 5\n",
    "\n",
    "    session_config = tf.ConfigProto(log_device_placement=False)\n",
    "    session_config.gpu_options.allow_growth = True\n",
    "\n",
    "    training_steps = 2000\n",
    "    batch_size = 128\n",
    "\n",
    "    # Network Parameters\n",
    "    num_input = 15 # MIMIC data input (15 features)\n",
    "    timesteps = max_seq_len # timesteps\n",
    "    num_classes = 2 # MNIST total classes (0-9 digits)\n",
    "\n",
    "    display_step = 10\n",
    "\n",
    "    gpu = 0\n",
    "\n",
    "    graph = tf.Graph()\n",
    "\n",
    "    file_appendix = \"Contrastive_MIMIC_LSTMRL_MaskGradients_\" + str(start_learning_rate) + \"_\" + str(decay_step) + \"_\" + str(decay_rate) + \"_\" + str(num_hidden) + \"_\" + str(actor_lr) + \"_\" + str(critic_lr)\n",
    "\n",
    "\n",
    "    def build_net(x, is_training=True, reuse=tf.AUTO_REUSE, graph=graph):\n",
    "\n",
    "            with graph.as_default():\n",
    "                with tf.variable_scope(\"lstm\", reuse=reuse) as scope:\n",
    "                    seq_len = seq_length(x)\n",
    "                    enumerated_last_idxs = tf.cast(tf.stack([seq_len-1, tf.range(tf.shape(seq_len)[0])], axis=1), tf.int32)\n",
    "                    x = tf.unstack(x, timesteps, 1)\n",
    "                    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, reuse=reuse)\n",
    "                    outputs, state, all_states = my_static_rnn(lstm_cell, x, dtype=tf.float32)\n",
    "                    last_outputs = tf.gather_nd(outputs, enumerated_last_idxs)\n",
    "            #         outputs = get_outputs(outputs, enumerated_seq_len)\n",
    "            #             with tf.variable_scope(\"NN\", reuse=tf.AUTO_REUSE) as scope:\n",
    "                    with slim.arg_scope([slim.fully_connected], \n",
    "                                            activation_fn=tf.nn.relu,\n",
    "                                            weights_initializer=tf.random_uniform_initializer(0.001, 0.01),\n",
    "                                            weights_regularizer=slim.l2_regularizer(0.005),\n",
    "                                            biases_regularizer=slim.l2_regularizer(0.005),\n",
    "                                            normalizer_fn = slim.batch_norm,\n",
    "    #                                             normalizer_fn = None,\n",
    "                                            normalizer_params = {\"is_training\": is_training},\n",
    "                                            reuse = reuse,\n",
    "                                            scope = scope):\n",
    "\n",
    "            #             fc1 = slim.fully_connected(x, weights[0], scope='fc1')\n",
    "            #             fc2 = slim.fully_connected(fc1, weights[1], scope='fc2')\n",
    "                        logits = slim.fully_connected(last_outputs,num_classes,activation_fn=None, weights_regularizer=None, normalizer_fn=None, scope='logits')\n",
    "                        pred = slim.softmax(logits, scope='pred')\n",
    "\n",
    "                        return logits, pred, outputs, x, all_states, seq_len, last_outputs\n",
    "\n",
    "\n",
    "    with graph.as_default():\n",
    "\n",
    "        dataset_train = tf.data.Dataset.from_generator(gen_train, (tf.float32, tf.float32, tf.int32), ([batch_size, timesteps, 15],[batch_size, 2],[batch_size, timesteps, 15])).repeat(5).shuffle(10)\n",
    "        input_train, label_train, mask_train = dataset_train.make_one_shot_iterator().get_next()\n",
    "\n",
    "        dataset_test = tf.data.Dataset.from_generator(gen_test, (tf.float32, tf.float32, tf.int32), ([ timesteps, 15],[ 2],[timesteps, 15])).repeat(10000).batch(len(df_shock_test.index.unique())+len(df_non_shock_test.index.unique()))\n",
    "        input_test, label_test, mask_test = dataset_test.make_one_shot_iterator().get_next()\n",
    "\n",
    "        input_train_holder = tf.placeholder(shape=[batch_size, timesteps, num_input], dtype=tf.float32)\n",
    "        label_train_holder = tf.placeholder(shape=[batch_size, 2], dtype=tf.float32)\n",
    "        mask_train_holder = tf.placeholder(shape=[batch_size, timesteps, num_input], dtype=tf.int32)\n",
    "\n",
    "        logits, prediction, outs, xs, states, seq_lens, last_outputs = build_net(input_train_holder)\n",
    "        loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label_train_holder) + tf.reduce_mean(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope=\"lstm\")), axis=0)\n",
    "    #         loss_op = tf.divide(tf.multiply(loss_op, max_seq_len), tf.reshape(seq_len, [-1, 1]))\n",
    "        learning_rate = tf.train.exponential_decay(start_learning_rate, tf.train.get_or_create_global_step(), decay_steps=decay_step, decay_rate=decay_rate)\n",
    "        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "\n",
    "        missing_idxs = tf.where_v2(mask_train)\n",
    "        missing_idxs = tf.stack([missing_idxs[:,1], missing_idxs[:,0], missing_idxs[:,2]], axis=-1)\n",
    "\n",
    "        # tensor names for <i,j,f,o> -- rnn/basic_lstm_cell/split_{}:<0,1,2,3>\n",
    "\n",
    "        i_gates = [graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split_\"+str(t)+\":0\") if t>0 else graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split:0\") for t in range(timesteps)]\n",
    "        j_gates = [graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split_\"+str(t)+\":1\") if t>0 else graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split:1\") for t in range(timesteps)]\n",
    "        f_gates = [graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split_\"+str(t)+\":2\") if t>0 else graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split:2\") for t in range(timesteps)]\n",
    "        o_gates = [graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split_\"+str(t)+\":3\") if t>0 else graph.get_tensor_by_name(\"lstm/rnn/basic_lstm_cell/split:3\") for t in range(timesteps)]\n",
    "\n",
    "        grads_i = optimizer.compute_gradients(loss_op,i_gates)\n",
    "        grads_i = [g[0] for g in grads_i]\n",
    "        grads_j = optimizer.compute_gradients(loss_op,j_gates)\n",
    "        grads_j = [g[0] for g in grads_j]\n",
    "        grads_f = optimizer.compute_gradients(loss_op,f_gates)\n",
    "        grads_f = [g[0] for g in grads_f]\n",
    "        grads_o = optimizer.compute_gradients(loss_op,o_gates)\n",
    "        grads_o = [g[0] for g in grads_o]\n",
    "\n",
    "        grads_i_j_f_o = [tf.concat([grads_i[t], grads_j[t], grads_f[t], grads_o[t]], axis=1) for t in range(timesteps)]\n",
    "\n",
    "    #     xs_need_to_be_zero = tf.gather_nd(xs,missing_idxs)\n",
    "    #     xs_updates = tf.scatter_nd(indices=missing_idxs, updates=-xs_need_to_be_zero, shape=[timesteps, batch_size, num_input])\n",
    "    #     xs_for_grads = xs + xs_updates\n",
    "        grad_attention = tf.placeholder(shape=[timesteps, batch_size, num_input], dtype=tf.float32)\n",
    "        xs_for_grads = tf.multiply(xs, grad_attention)\n",
    "        W_grads = tf.tensordot(xs_for_grads, grads_i_j_f_o, axes=[[0,1],[0,1]])/batch_size\n",
    "\n",
    "        enumerated_seq_lens = tf.cast(tf.stack([seq_lens, tf.range(tf.shape(seq_lens)[0])], axis=1), tf.int32)\n",
    "\n",
    "        def cond(i, e, o):\n",
    "            return i < batch_size\n",
    "        def body(i, e, o):\n",
    "            o = tf.concat([o,tf.stack([tf.range(e[i,0]),tf.repeat(e[i,1],e[i,0])],axis=-1)],axis=0)\n",
    "            return i+1, e, o\n",
    "\n",
    "        _,_,nonzero_out_idxs = tf.while_loop(cond,body,[tf.constant(1, dtype=tf.int32), enumerated_seq_lens, tf.stack([tf.range(enumerated_seq_lens[0,0]),tf.repeat(enumerated_seq_lens[0,1],enumerated_seq_lens[0,0])],axis=-1)], shape_invariants=[tf.TensorShape([]),tf.TensorShape([None,2]),tf.TensorShape([None,2])])\n",
    "\n",
    "        outs_non_zero = tf.gather_nd(outs,nonzero_out_idxs)\n",
    "        outs_updates = tf.scatter_nd(indices=nonzero_out_idxs, updates=outs_non_zero, shape=[timesteps, batch_size, num_hidden])\n",
    "        outs = tf.zeros((timesteps,batch_size,num_hidden)) + outs_updates\n",
    "        U_grads = tf.tensordot(outs, grads_i_j_f_o, axes=[[0,1],[0,1]])/batch_size\n",
    "        lstm_kernel_grads = tf.concat([W_grads,U_grads],axis=0)     \n",
    "\n",
    "        logits_final, pred_final, _, _, _, _, _ = build_net(input_test, is_training=False)\n",
    "\n",
    "\n",
    "        grads = optimizer.compute_gradients(loss_op, [v for v in tf.trainable_variables() if v.name.find(\"lstm\")!=-1])\n",
    "        grads = [g[0] for g in grads]\n",
    "\n",
    "        grads[0] = lstm_kernel_grads\n",
    "\n",
    "\n",
    "        grads_update_op = optimizer.apply_gradients(zip(grads, [v for v in tf.trainable_variables() if v.name.find(\"lstm\")!=-1]))\n",
    "\n",
    "        train_correct_pred = tf.equal(tf.cast(tf.argmax(prediction, 1),tf.float32), tf.cast(tf.argmax(label_train_holder, 1),tf.float32) )\n",
    "        train_accuracy = tf.reduce_mean(tf.cast(train_correct_pred, tf.float32))\n",
    "        train_kld = tf.keras.losses.KLDivergence()(prediction, label_train_holder)\n",
    "\n",
    "        final_correct_pred = tf.equal(tf.cast(tf.argmax(pred_final, 1), tf.float32), tf.cast(tf.argmax(label_test, 1),tf.float32))\n",
    "        final_accuracy = tf.reduce_mean(tf.cast(final_correct_pred, tf.float32))\n",
    "        final_kld = tf.keras.losses.KLDivergence()(pred_final, label_test)\n",
    "\n",
    "        final_score = pred_final[:,1]\n",
    "\n",
    "        max_final_acc = tf.Variable(0, dtype=tf.float32, name=\"max_final_acc\", trainable=False)\n",
    "        assign_max_final_acc = max_final_acc.assign(final_accuracy)\n",
    "\n",
    "    with graph.as_default():\n",
    "        actor = Actor(graph=graph, state_dim=num_input*2+num_hidden*2, action_dim=num_input, learning_rate=actor_lr, tau=0.001, batch_size=batch_size, save_path=\"./saved_model/\"+file_appendix+\"/actor.ckpt\")\n",
    "        critic = Critic(graph=graph, state_dim=num_input*2+num_hidden*2, action_dim=num_input, learning_rate=critic_lr, tau=0.001, gamma=0.99, save_path=\"./saved_model/\"+file_appendix+\"/critic.ckpt\")\n",
    "        init = tf.global_variables_initializer()\n",
    "        saver = tf.train.Saver()\n",
    "\n",
    "    # Start training\n",
    "    with tf.Session(config=session_config, graph=graph) as sess:\n",
    "        \n",
    "        if os.path.exists(os.path.join(\"saved_model\",file_appendix)):\n",
    "            actor.saver.restore(sess, os.path.join(\"saved_model\",file_appendix,\"actor.ckpt\"))\n",
    "            critic.saver.restore(sess, os.path.join(\"saved_model\",file_appendix,\"critic.ckpt\"))\n",
    "            saver.restore(sess, os.path.join(\"saved_model\",file_appendix,\"best.ckpt\"))\n",
    "            log = pd.read_csv(os.path.join(\"stats\",\"rl_log\",file_appendix+\".txt\"), delimiter=\",\", header=None)\n",
    "            steps_run = int(log.iloc[-1][0].split(\" \")[1])\n",
    "            EXPLORATION_RATE = 0.6*(0.95**(steps_run))\n",
    "            GUIDE_RATE = .15*(0.95**(steps_run))\n",
    "#             if GUIDE_RATE < .8:\n",
    "#                 GUIDE_RATE = .8\n",
    "            start_learning_rate = start_learning_rate*(decay_rate**float(steps_run/decay_step))\n",
    "        else:\n",
    "            steps_run = 0\n",
    "            sess.run(init)\n",
    "            EXPLORATION_RATE = 0.6\n",
    "            GUIDE_RATE = .15\n",
    "#         sess.run(init)\n",
    "#         EXPLORATION_RATE = 0.6\n",
    "#         GUIDE_RATE = 0.15\n",
    "        ep_reward = 0\n",
    "        ep_ave_max_q = 0\n",
    "\n",
    "        data_in, label_in, s_mask = sess.run([input_train, label_train, mask_train])\n",
    "\n",
    "\n",
    "        s_1, s_2 = sess.run([states, outs], feed_dict = {input_train_holder:data_in, label_train_holder:label_in, mask_train_holder:s_mask})\n",
    "        s = np.concatenate([np.asarray(np.split(data_in,timesteps,axis=1)).reshape(timesteps,batch_size,num_input),\n",
    "                       np.asarray(np.split(s_mask,timesteps,axis=1)).reshape(timesteps,batch_size,num_input)\n",
    "                            ,s_1,s_2], axis=-1)\n",
    "\n",
    "\n",
    "        reward_list = []\n",
    "        ave_max_q_list = []\n",
    "        replay_buffer = ReplayBuffer(10**4, random_seed=SEED)\n",
    "\n",
    "        # Run the initializer\n",
    "\n",
    "\n",
    "        max_auc = 0.\n",
    "        max_ap = 0.\n",
    "\n",
    "        actor.update_target_network(sess)\n",
    "        critic.update_target_network(sess)\n",
    "\n",
    "        for step in range(steps_run, training_steps):\n",
    "            \n",
    "            rand_num = np.random.rand(1)\n",
    "\n",
    "            if rand_num <= EXPLORATION_RATE:\n",
    "                a = np.concatenate([left_truncnorm.rvs(timesteps*batch_size*num_input/2),right_truncnorm.rvs(timesteps*batch_size*num_input/2)])\n",
    "                np.random.shuffle(a)\n",
    "                a = a.reshape(timesteps, batch_size, num_input).astype(np.float32)\n",
    "\n",
    "            elif rand_num <= GUIDE_RATE+EXPLORATION_RATE and rand_num > EXPLORATION_RATE:\n",
    "                a = np.asarray(np.split((1-s_mask).astype(np.float32), timesteps, axis=1)).reshape(timesteps,batch_size,num_input)\n",
    "\n",
    "            else:\n",
    "                a = actor.predict(s.reshape(-1,num_input*2+num_hidden*2), sess)\n",
    "                a = a.reshape(timesteps, batch_size, num_input)\n",
    "    #                 print \"a: \", a\n",
    "    #                 print \"mask: \", s_mask\n",
    "            last_outs, _, kld = sess.run([last_outputs, grads_update_op, train_kld], feed_dict={grad_attention:a, input_train_holder:data_in, label_train_holder:label_in, mask_train_holder:s_mask})\n",
    "            acc, score = sess.run([final_accuracy, final_score])\n",
    "            data_in, label_in, s2_mask = sess.run([input_train, label_train, mask_train])\n",
    "            s2_1, s2_2 = sess.run([states, outs], feed_dict = {input_train_holder:data_in, label_train_holder:label_in})\n",
    "            s2 = np.concatenate([np.asarray(np.split(data_in,timesteps,axis=1)).reshape(timesteps,batch_size,num_input),\n",
    "                       np.asarray(np.split(s_mask,timesteps,axis=1)).reshape(timesteps,batch_size,num_input)\n",
    "                            ,s2_1,s2_2], axis=-1)\n",
    "    #         auc = roc_auc_score(sess.run(label_test), score)\n",
    "    #         ap = average_precision_score(sess.run(label_test), score)\n",
    "    #         r = np.repeat(acc + auc + ap - 3., batch_size)\n",
    "            r = np.repeat(-kld, batch_size)\n",
    "            r_mse = mean_squared_error(last_outs[:batch_size/4, :], last_outs[batch_size/2:batch_size*3/4, :]) + \\\n",
    "                    mean_squared_error(last_outs[batch_size/4:batch_size/2, :], last_outs[batch_size*3/4:, :]) - \\\n",
    "                    mean_squared_error(last_outs[:batch_size/4, :],last_outs[batch_size/4:batch_size/2, :]) - \\\n",
    "                    mean_squared_error(last_outs[batch_size/2:batch_size*3/4, :],last_outs[batch_size*3/4:, :])\n",
    "            r = r + 5*r_mse\n",
    "    #         print \"KLD \", kld, \"MSE\", r_mse\n",
    "            replay_buffer.add_batch([list(i) for i in zip(s.reshape(-1,num_input*2+num_hidden*2),a.reshape(-1,num_input),r,s2.reshape(-1,num_input*2+num_hidden*2))])\n",
    "\n",
    "            if replay_buffer.size() > batch_size:\n",
    "                s_batch, a_batch, r_batch, s2_batch = replay_buffer.sample_batch(batch_size)\n",
    "\n",
    "                # Calculate targets\n",
    "                target_q = critic.predict_target(\n",
    "                    s2_batch, actor.predict_target(s2_batch, sess), sess)\n",
    "\n",
    "                y_i = []\n",
    "                for k in range(batch_size):\n",
    "                    y_i.append(r_batch[k] + critic.gamma * target_q[k])\n",
    "\n",
    "                # Update the critic given the targets\n",
    "                predicted_q_value, _ = critic.train(\n",
    "                    s_batch, a_batch, np.reshape(y_i, (batch_size, 1)), step, sess)\n",
    "\n",
    "                ave_max_q = np.amax(predicted_q_value)\n",
    "                ave_max_q_list += [ave_max_q]\n",
    "\n",
    "                # Update the actor policy using the sampled gradient\n",
    "                a_outs = actor.predict(s_batch, sess)\n",
    "                grads = critic.action_gradients(s_batch, a_outs, sess)\n",
    "                actor.train(s_batch, grads[0], step, sess)\n",
    "\n",
    "                # Update target networks\n",
    "                actor.update_target_network(sess)\n",
    "                critic.update_target_network(sess)\n",
    "\n",
    "            s = s2\n",
    "            s_mask = s2_mask\n",
    "\n",
    "            reward_list += [r[0]]\n",
    "\n",
    "            EXPLORATION_RATE = EXPLORATION_RATE * 0.95\n",
    "            GUIDE_RATE = GUIDE_RATE * 0.95\n",
    "\n",
    "\n",
    "            if step % display_step == 0 and step > 0:\n",
    "                # Calculate batch loss and accuracy\n",
    "                loss, acc, train_acc = sess.run([loss_op, final_accuracy, train_accuracy], feed_dict = {input_train_holder:data_in, label_train_holder:label_in})\n",
    "                auc = roc_auc_score(np.argmax(sess.run(label_test), axis=1), final_score.eval())\n",
    "                ap = average_precision_score(np.argmax(sess.run(label_test), axis=1), final_score.eval())\n",
    "                if np.mean(reward_list[-display_step:]) >= rl_reward_thres_for_decay:\n",
    "                    actor.decay_learning_rate(0.965, sess)\n",
    "                    critic.decay_learning_rate(0.965, sess)\n",
    "                if acc > max_final_acc.eval():\n",
    "                    max_auc = auc\n",
    "                    max_ap = ap\n",
    "                    sess.run(assign_max_final_acc)\n",
    "                    saver.save(sess, \"./saved_model/\"+file_appendix+\"/best.ckpt\")\n",
    "                print \"Step \" + str(step) + \", Reward=\" + str(np.sum(reward_list[-display_step:])) + \", Minibatch Loss= \" + \\\n",
    "                      \"{:.4f}\".format(loss) + \", Training Accuracy= \" + \\\n",
    "                      \"{:.3f}\".format(train_acc) + \", Testing Acc= \" + \"{:3f}\".format(final_accuracy.eval()) + \\\n",
    "                      \", Max Final Accuracy= \", \"{:6f}\".format(max_final_acc.eval()) + \\\n",
    "                      \", Max AUC= \", \"{:6f}\".format(max_auc) + \\\n",
    "                      \", Max AP= \", \"{:6f}\".format(max_ap) + \\\n",
    "                      \", Max Q= \", \"{:6f}\".format(np.mean(ave_max_q_list[-display_step:]))\n",
    "                with open(\"./stats/rl_log/\" + file_appendix + \".txt\", \"ab\") as myfile:\n",
    "                    myfile.write(\"Step \" + str(step) + \", Reward=\" + str(np.sum(reward_list[-display_step:])) + \", Minibatch Loss= \" + \"{:.4f}\".format(loss) + \", Training Accuracy= \" + \"{:.3f}\".format(train_acc) + \", Testing Acc= \" + \"{:3f}\".format(final_accuracy.eval()) + \", Max Final Accuracy= \" + \"{:6f}\".format(max_final_acc.eval()) + \", Max AUC= \" + \"{:6f}\".format(max_auc) + \", Max AP= \" + \"{:6f}\".format(max_ap) + \"\\n\")\n",
    "\n",
    "        print \"Optimization Finished!\"\n",
    "\n",
    "        print \"Testing Accuracy:\", sess.run(max_final_acc)\n",
    "        with open(\"./stats/Contrastive_MIMIC_LSTMRL_maskGradients.txt\", \"ab\") as myfile:\n",
    "            myfile.write(\"%.6f\\t%i\\t%.3f\\t%.6f\\t%.6f\\t%i\\t%.6f\\t%.6f\\t%.6f\\n\" %(start_learning_rate, decay_step, decay_rate, actor_lr, critic_lr, num_hidden, max_final_acc.eval(), max_auc, max_ap))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "W0522 19:59:40.708055 139871436928832 deprecation.py:323] From <ipython-input-33-afb6f246fc10>:74: make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.\n",
      "W0522 19:59:40.783165 139871436928832 deprecation.py:323] From <ipython-input-33-afb6f246fc10>:47: __init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
      "W0522 19:59:40.801947 139871436928832 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/rnn_cell_impl.py:735: add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.add_weight` method instead.\n",
      "W0522 19:59:40.813689 139871436928832 deprecation.py:506] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/rnn_cell_impl.py:739: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Call initializer instance with the dtype argument instead of passing it to the constructor\n",
      "W0522 19:59:40.717907 139871436928832 deprecation.py:323] From <ipython-input-33-afb6f246fc10>:74: make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.\n",
      "W0522 19:59:41.066144 139871436928832 deprecation.py:323] From <ipython-input-33-afb6f246fc10>:47: __init__ (from tensorflow.python.ops.rnn_cell_impl) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.\n",
      "W0522 19:59:41.097544 139871436928832 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/rnn_cell_impl.py:735: add_variable (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.add_weight` method instead.\n",
      "W0522 19:59:41.114741 139871436928832 deprecation.py:506] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/rnn_cell_impl.py:739: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Call initializer instance with the dtype argument instead of passing it to the constructor\n",
      "W0522 19:59:42.287874 139871436928832 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1866: apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n",
      "W0522 19:59:42.329375 139871436928832 deprecation.py:323] From <ipython-input-33-afb6f246fc10>:84: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "\n",
      "Future major versions of TensorFlow will allow gradients to flow\n",
      "into the labels input on backprop by default.\n",
      "\n",
      "See `tf.nn.softmax_cross_entropy_with_logits_v2`.\n",
      "\n",
      "W0522 19:59:42.558566 139871436928832 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1866: apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n",
      "W0522 19:59:42.601459 139871436928832 deprecation.py:323] From <ipython-input-33-afb6f246fc10>:84: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "\n",
      "Future major versions of TensorFlow will allow gradients to flow\n",
      "into the labels input on backprop by default.\n",
      "\n",
      "See `tf.nn.softmax_cross_entropy_with_logits_v2`.\n",
      "\n",
      "W0522 20:00:12.256536 139871436928832 deprecation.py:323] From ../qnetwork.py:37: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Deprecated in favor of operator or tf.math.divide.\n",
      "W0522 20:00:12.356026 139871436928832 module_wrapper.py:139] From ../qnetwork.py:47: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.\n",
      "\n",
      "W0522 20:00:12.764827 139871436928832 module_wrapper.py:139] From ../qnetwork.py:137: The name tf.losses.mean_squared_error is deprecated. Please use tf.compat.v1.losses.mean_squared_error instead.\n",
      "\n",
      "W0522 20:00:12.776371 139871436928832 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/losses/losses_impl.py:121: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
      "W0522 20:00:13.125993 139871436928832 deprecation.py:323] From ../qnetwork.py:37: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Deprecated in favor of operator or tf.math.divide.\n",
      "W0522 20:00:13.238483 139871436928832 module_wrapper.py:139] From ../qnetwork.py:47: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.\n",
      "\n",
      "W0522 20:00:13.640470 139871436928832 module_wrapper.py:139] From ../qnetwork.py:137: The name tf.losses.mean_squared_error is deprecated. Please use tf.compat.v1.losses.mean_squared_error instead.\n",
      "\n",
      "W0522 20:00:13.651025 139871436928832 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/losses/losses_impl.py:121: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 10, Reward=-35.00559, Minibatch Loss= 0.3200, Training Accuracy= 0.875, Testing Acc= 0.866359, Max Final Accuracy=  0.866359, Max AUC=  0.902886, Max AP=  0.836851, Max Q=  -0.211569\n",
      "Step 10, Reward=-35.018295, Minibatch Loss= 0.3196, Training Accuracy= 0.875, Testing Acc= 0.864055, Max Final Accuracy=  0.864055, Max AUC=  0.902610, Max AP=  0.836618, Max Q=  -0.210007\n",
      "Step 20, Reward=-8.244965, Minibatch Loss= 0.2779, Training Accuracy= 0.891, Testing Acc= 0.873272, Max Final Accuracy=  0.873272, Max AUC=  0.937969, Max AP=  0.923509, Max Q=  -0.144750\n",
      "Step 20, Reward=-8.90247, Minibatch Loss= 0.2787, Training Accuracy= 0.891, Testing Acc= 0.873272, Max Final Accuracy=  0.873272, Max AUC=  0.937990, Max AP=  0.923810, Max Q=  -0.158837\n",
      "Step 30, Reward=-7.696951, Minibatch Loss= 0.2168, Training Accuracy= 0.922, Testing Acc= 0.868664, Max Final Accuracy=  0.873272, Max AUC=  0.937990, Max AP=  0.923810, Max Q=  -0.107345\n",
      "Step 30, Reward=-7.690688, Minibatch Loss= 0.2121, Training Accuracy= 0.922, Testing Acc= 0.875576, Max Final Accuracy=  0.875576, Max AUC=  0.945932, Max AP=  0.930634, Max Q=  -0.110216\n",
      "Step 40, Reward=-6.995404, Minibatch Loss= 0.2900, Training Accuracy= 0.891, Testing Acc= 0.884793, Max Final Accuracy=  0.884793, Max AUC=  0.940942, Max AP=  0.924048, Max Q=  -0.117980\n",
      "Step 40, Reward=-5.524526, Minibatch Loss= 0.3045, Training Accuracy= 0.875, Testing Acc= 0.880184, Max Final Accuracy=  0.880184, Max AUC=  0.933806, Max AP=  0.910184, Max Q=  -0.206336\n",
      "Step 50, Reward=-3.1554518, Minibatch Loss= 0.2902, Training Accuracy= 0.898, Testing Acc= 0.884793, Max Final Accuracy=  0.884793, Max AUC=  0.940942, Max AP=  0.924048, Max Q=  -0.281168\n",
      "Step 50, Reward=-1.8198289, Minibatch Loss= 0.2713, Training Accuracy= 0.898, Testing Acc= 0.880184, Max Final Accuracy=  0.880184, Max AUC=  0.933806, Max AP=  0.910184, Max Q=  -0.430032\n",
      "Step 60, Reward=-5.674563, Minibatch Loss= 0.3110, Training Accuracy= 0.883, Testing Acc= 0.884793, Max Final Accuracy=  0.884793, Max AUC=  0.940942, Max AP=  0.924048, Max Q=  -0.382637\n",
      "Step 60, Reward=-5.3866577, Minibatch Loss= 0.3073, Training Accuracy= 0.891, Testing Acc= 0.877880, Max Final Accuracy=  0.880184, Max AUC=  0.933806, Max AP=  0.910184, Max Q=  -0.336338\n",
      "Step 70, Reward=-1.9947414, Minibatch Loss= 0.2964, Training Accuracy= 0.852, Testing Acc= 0.887097, Max Final Accuracy=  0.887097, Max AUC=  0.952845, Max AP=  0.946156, Max Q=  -0.387921\n",
      "Step 70, Reward=-0.39318955, Minibatch Loss= 0.3051, Training Accuracy= 0.859, Testing Acc= 0.880184, Max Final Accuracy=  0.880184, Max AUC=  0.933806, Max AP=  0.910184, Max Q=  -0.162271\n",
      "Step 80, Reward=-6.764731, Minibatch Loss= 0.2205, Training Accuracy= 0.930, Testing Acc= 0.894009, Max Final Accuracy=  0.894009, Max AUC=  0.957740, Max AP=  0.951035, Max Q=  -0.287498\n",
      "Step 80, Reward=-7.9815054, Minibatch Loss= 0.2235, Training Accuracy= 0.914, Testing Acc= 0.889401, Max Final Accuracy=  0.889401, Max AUC=  0.948820, Max AP=  0.941924, Max Q=  -0.192777\n",
      "Step 780, Reward=8.792523, Minibatch Loss= 0.0964, Training Accuracy= 0.953, Testing Acc= 0.923963, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.129825\n",
      "Step 820, Reward=5.414458, Minibatch Loss= 0.1699, Training Accuracy= 0.945, Testing Acc= 0.910138, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.867358\n",
      "Step 790, Reward=9.63889, Minibatch Loss= 0.0955, Training Accuracy= 0.961, Testing Acc= 0.917051, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.294780\n",
      "Step 830, Reward=4.052806, Minibatch Loss= 0.1346, Training Accuracy= 0.938, Testing Acc= 0.914747, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.765116\n",
      "Step 800, Reward=13.170601, Minibatch Loss= 0.1165, Training Accuracy= 0.969, Testing Acc= 0.905530, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.393134\n",
      "Step 840, Reward=1.950003, Minibatch Loss= 0.1703, Training Accuracy= 0.938, Testing Acc= 0.907834, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.916119\n",
      "Step 810, Reward=7.3116565, Minibatch Loss= 0.1285, Training Accuracy= 0.961, Testing Acc= 0.919355, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.295698\n",
      "Step 850, Reward=3.4469175, Minibatch Loss= 0.2680, Training Accuracy= 0.898, Testing Acc= 0.900922, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.870529\n",
      "Step 820, Reward=11.352291, Minibatch Loss= 0.1437, Training Accuracy= 0.938, Testing Acc= 0.917051, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.453635\n",
      "Step 860, Reward=3.7558422, Minibatch Loss= 0.1246, Training Accuracy= 0.953, Testing Acc= 0.889401, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.854710\n",
      "Step 830, Reward=9.284419, Minibatch Loss= 0.0923, Training Accuracy= 0.969, Testing Acc= 0.907834, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.344828\n",
      "Step 870, Reward=3.1870341, Minibatch Loss= 0.1573, Training Accuracy= 0.938, Testing Acc= 0.903226, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.796284\n",
      "Step 840, Reward=8.006442, Minibatch Loss= 0.1697, Training Accuracy= 0.938, Testing Acc= 0.889401, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.591127\n",
      "Step 880, Reward=7.224756, Minibatch Loss= 0.1333, Training Accuracy= 0.938, Testing Acc= 0.900922, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.814664\n",
      "Step 850, Reward=9.816172, Minibatch Loss= 0.2853, Training Accuracy= 0.891, Testing Acc= 0.917051, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.471416\n",
      "Step 890, Reward=3.8832364, Minibatch Loss= 0.2125, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.822143\n",
      "Step 860, Reward=11.285376, Minibatch Loss= 0.0544, Training Accuracy= 0.984, Testing Acc= 0.912442, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.542153\n",
      "Step 900, Reward=1.6755971, Minibatch Loss= 0.1298, Training Accuracy= 0.969, Testing Acc= 0.894009, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.724002\n",
      "Step 870, Reward=10.759779, Minibatch Loss= 0.1512, Training Accuracy= 0.938, Testing Acc= 0.898618, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.412679\n",
      "Step 910, Reward=4.1254644, Minibatch Loss= 0.1749, Training Accuracy= 0.945, Testing Acc= 0.910138, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.523242\n",
      "Step 880, Reward=12.729711, Minibatch Loss= 0.1085, Training Accuracy= 0.961, Testing Acc= 0.910138, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.641201\n",
      "Step 920, Reward=6.1238046, Minibatch Loss= 0.1260, Training Accuracy= 0.945, Testing Acc= 0.903226, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.676365\n",
      "Step 890, Reward=9.087356, Minibatch Loss= 0.2138, Training Accuracy= 0.906, Testing Acc= 0.889401, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.616848\n",
      "Step 930, Reward=2.6768086, Minibatch Loss= 0.1317, Training Accuracy= 0.961, Testing Acc= 0.914747, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.796330\n",
      "Step 900, Reward=7.4904575, Minibatch Loss= 0.1108, Training Accuracy= 0.945, Testing Acc= 0.907834, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.374752\n",
      "Step 940, Reward=1.1756314, Minibatch Loss= 0.1930, Training Accuracy= 0.930, Testing Acc= 0.919355, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.788811\n",
      "Step 910, Reward=11.283497, Minibatch Loss= 0.1578, Training Accuracy= 0.953, Testing Acc= 0.912442, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.395164\n",
      "Step 950, Reward=2.9328737, Minibatch Loss= 0.1347, Training Accuracy= 0.945, Testing Acc= 0.919355, Max Final Accuracy=  0.923963, Max AUC=  0.965002, Max AP=  0.959112, Max Q=  0.596705\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 920, Reward=11.374731, Minibatch Loss= 0.1453, Training Accuracy= 0.953, Testing Acc= 0.873272, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.464888\n",
      "Step 960, Reward=3.989657, Minibatch Loss= 0.2767, Training Accuracy= 0.906, Testing Acc= 0.926267, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.689742\n",
      "Step 930, Reward=10.1539345, Minibatch Loss= 0.1326, Training Accuracy= 0.969, Testing Acc= 0.905530, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.457257\n",
      "Step 970, Reward=0.06414664, Minibatch Loss= 0.1352, Training Accuracy= 0.938, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.671654\n",
      "Step 940, Reward=8.223081, Minibatch Loss= 0.1601, Training Accuracy= 0.961, Testing Acc= 0.923963, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.534923\n",
      "Step 980, Reward=5.026466, Minibatch Loss= 0.2057, Training Accuracy= 0.922, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.714562\n",
      "Step 950, Reward=11.814898, Minibatch Loss= 0.0904, Training Accuracy= 0.977, Testing Acc= 0.905530, Max Final Accuracy=  0.923963, Max AUC=  0.963983, Max AP=  0.958530, Max Q=  1.362355\n",
      "Step 990, Reward=2.7035203, Minibatch Loss= 0.2163, Training Accuracy= 0.914, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.519962\n",
      "Step 960, Reward=11.31142, Minibatch Loss= 0.1961, Training Accuracy= 0.938, Testing Acc= 0.926267, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.537469\n",
      "Step 1000, Reward=1.5043281, Minibatch Loss= 0.2329, Training Accuracy= 0.922, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.567589\n",
      "Step 970, Reward=7.0318666, Minibatch Loss= 0.0971, Training Accuracy= 0.961, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.400159\n",
      "Step 1010, Reward=0.49495637, Minibatch Loss= 0.1278, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.779272\n",
      "Step 980, Reward=11.925537, Minibatch Loss= 0.2155, Training Accuracy= 0.914, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.606062\n",
      "Step 1020, Reward=2.3684611, Minibatch Loss= 0.1216, Training Accuracy= 0.969, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.656137\n",
      "Step 990, Reward=8.740107, Minibatch Loss= 0.1883, Training Accuracy= 0.906, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.450263\n",
      "Step 1030, Reward=3.2763991, Minibatch Loss= 0.2103, Training Accuracy= 0.938, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.618168\n",
      "Step 1000, Reward=7.747385, Minibatch Loss= 0.1959, Training Accuracy= 0.938, Testing Acc= 0.884793, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.460580\n",
      "Step 1040, Reward=3.272049, Minibatch Loss= 0.1407, Training Accuracy= 0.961, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.584567\n",
      "Step 1010, Reward=9.528393, Minibatch Loss= 0.1107, Training Accuracy= 0.961, Testing Acc= 0.923963, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.383219\n",
      "Step 1050, Reward=0.5810189, Minibatch Loss= 0.1829, Training Accuracy= 0.930, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.499597\n",
      "Step 1020, Reward=10.1694145, Minibatch Loss= 0.1231, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.377934\n",
      "Step 1060, Reward=2.1277559, Minibatch Loss= 0.1695, Training Accuracy= 0.953, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.494900\n",
      "Step 1030, Reward=10.127204, Minibatch Loss= 0.1640, Training Accuracy= 0.938, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.394596\n",
      "Step 1070, Reward=1.9604852, Minibatch Loss= 0.1595, Training Accuracy= 0.945, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.458981\n",
      "Step 1040, Reward=10.205774, Minibatch Loss= 0.1218, Training Accuracy= 0.961, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.414385\n",
      "Step 1080, Reward=5.978389, Minibatch Loss= 0.1088, Training Accuracy= 0.961, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.439383\n",
      "Step 1050, Reward=7.575253, Minibatch Loss= 0.1652, Training Accuracy= 0.930, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.372462\n",
      "Step 1090, Reward=5.0204363, Minibatch Loss= 0.2306, Training Accuracy= 0.930, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.513521\n",
      "Step 1060, Reward=9.484842, Minibatch Loss= 0.1449, Training Accuracy= 0.938, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.380808\n",
      "Step 1100, Reward=1.7977136, Minibatch Loss= 0.1498, Training Accuracy= 0.961, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.644862\n",
      "Step 1070, Reward=8.970307, Minibatch Loss= 0.1370, Training Accuracy= 0.961, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.230700\n",
      "Step 1110, Reward=2.8994145, Minibatch Loss= 0.1723, Training Accuracy= 0.930, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.573172\n",
      "Step 1080, Reward=13.899729, Minibatch Loss= 0.1025, Training Accuracy= 0.969, Testing Acc= 0.926267, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.358104\n",
      "Step 1120, Reward=3.603591, Minibatch Loss= 0.1607, Training Accuracy= 0.961, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.507851\n",
      "Step 1090, Reward=12.421158, Minibatch Loss= 0.1827, Training Accuracy= 0.945, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.296077\n",
      "Step 1130, Reward=5.9446917, Minibatch Loss= 0.1087, Training Accuracy= 0.961, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.555396\n",
      "Step 1100, Reward=9.001776, Minibatch Loss= 0.1753, Training Accuracy= 0.938, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.403866\n",
      "Step 1140, Reward=5.265378, Minibatch Loss= 0.1659, Training Accuracy= 0.945, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.724927\n",
      "Step 1110, Reward=8.8144865, Minibatch Loss= 0.1555, Training Accuracy= 0.945, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.462197\n",
      "Step 1150, Reward=1.7781074, Minibatch Loss= 0.1931, Training Accuracy= 0.938, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.737996\n",
      "Step 1120, Reward=9.619055, Minibatch Loss= 0.1254, Training Accuracy= 0.961, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.520043\n",
      "Step 1160, Reward=1.7834194, Minibatch Loss= 0.0803, Training Accuracy= 0.984, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.800386\n",
      "Step 1130, Reward=12.961018, Minibatch Loss= 0.1000, Training Accuracy= 0.977, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.505810\n",
      "Step 1170, Reward=4.009314, Minibatch Loss= 0.0747, Training Accuracy= 0.961, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.613848\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1140, Reward=12.752083, Minibatch Loss= 0.1597, Training Accuracy= 0.938, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.481734\n",
      "Step 1180, Reward=2.617261, Minibatch Loss= 0.1046, Training Accuracy= 0.992, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.643148\n",
      "Step 1150, Reward=8.029838, Minibatch Loss= 0.1391, Training Accuracy= 0.930, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.463197\n",
      "Step 1190, Reward=5.0152674, Minibatch Loss= 0.1671, Training Accuracy= 0.953, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.621888\n",
      "Step 1160, Reward=8.409364, Minibatch Loss= 0.0716, Training Accuracy= 0.984, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.513901\n",
      "Step 1200, Reward=4.106942, Minibatch Loss= 0.1247, Training Accuracy= 0.953, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.595167\n",
      "Step 1210, Reward=2.5305624, Minibatch Loss= 0.0917, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.752646\n",
      "Step 1170, Reward=11.801317, Minibatch Loss= 0.0549, Training Accuracy= 0.984, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.367084\n",
      "Step 1220, Reward=4.282164, Minibatch Loss= 0.1193, Training Accuracy= 0.969, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.544111\n",
      "Step 1180, Reward=9.102715, Minibatch Loss= 0.0993, Training Accuracy= 0.961, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.510120\n",
      "Step 1230, Reward=2.3102536, Minibatch Loss= 0.1727, Training Accuracy= 0.938, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.548032\n",
      "Step 1190, Reward=13.257519, Minibatch Loss= 0.1341, Training Accuracy= 0.969, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.518152\n",
      "Step 1240, Reward=6.84904, Minibatch Loss= 0.1262, Training Accuracy= 0.969, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.616020\n",
      "Step 1200, Reward=12.033989, Minibatch Loss= 0.1143, Training Accuracy= 0.961, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.470147\n",
      "Step 1250, Reward=2.0875475, Minibatch Loss= 0.1458, Training Accuracy= 0.945, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.628396\n",
      "Step 1210, Reward=9.496053, Minibatch Loss= 0.0661, Training Accuracy= 0.977, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.583693\n",
      "Step 1260, Reward=3.246685, Minibatch Loss= 0.1610, Training Accuracy= 0.930, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.705325\n",
      "Step 1220, Reward=12.768483, Minibatch Loss= 0.0876, Training Accuracy= 0.969, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.406656\n",
      "Step 1270, Reward=3.1737366, Minibatch Loss= 0.1242, Training Accuracy= 0.961, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.744759\n",
      "Step 1230, Reward=9.9583025, Minibatch Loss= 0.1097, Training Accuracy= 0.961, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.551785\n",
      "Step 1280, Reward=2.3135438, Minibatch Loss= 0.1715, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.780947\n",
      "Step 1240, Reward=14.621823, Minibatch Loss= 0.0860, Training Accuracy= 0.969, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.637519\n",
      "Step 1290, Reward=3.3143513, Minibatch Loss= 0.1329, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.546597\n",
      "Step 1250, Reward=10.138264, Minibatch Loss= 0.1067, Training Accuracy= 0.961, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.619302\n",
      "Step 1300, Reward=4.601208, Minibatch Loss= 0.0961, Training Accuracy= 0.969, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.539231\n",
      "Step 1260, Reward=12.321676, Minibatch Loss= 0.1187, Training Accuracy= 0.961, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.686191\n",
      "Step 1310, Reward=1.7793244, Minibatch Loss= 0.1355, Training Accuracy= 0.969, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.633618\n",
      "Step 1270, Reward=11.745944, Minibatch Loss= 0.0982, Training Accuracy= 0.977, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.803198\n",
      "Step 1320, Reward=3.6507237, Minibatch Loss= 0.2041, Training Accuracy= 0.930, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.638278\n",
      "Step 1280, Reward=10.2852745, Minibatch Loss= 0.1435, Training Accuracy= 0.953, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.694481\n",
      "Step 1330, Reward=3.9800754, Minibatch Loss= 0.1155, Training Accuracy= 0.953, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.593652\n",
      "Step 1290, Reward=11.621624, Minibatch Loss= 0.1199, Training Accuracy= 0.961, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.449243\n",
      "Step 1340, Reward=6.114708, Minibatch Loss= 0.2150, Training Accuracy= 0.922, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.685785\n",
      "Step 1300, Reward=13.468363, Minibatch Loss= 0.0840, Training Accuracy= 0.977, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.604793\n",
      "Step 1350, Reward=3.886045, Minibatch Loss= 0.1400, Training Accuracy= 0.945, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.799248\n",
      "Step 1310, Reward=10.606091, Minibatch Loss= 0.1034, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.538114\n",
      "Step 1360, Reward=1.9927365, Minibatch Loss= 0.0741, Training Accuracy= 0.984, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.657584\n",
      "Step 1320, Reward=12.066104, Minibatch Loss= 0.1772, Training Accuracy= 0.906, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.636892\n",
      "Step 1370, Reward=3.357076, Minibatch Loss= 0.0980, Training Accuracy= 0.977, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.567138\n",
      "Step 1330, Reward=11.429793, Minibatch Loss= 0.0913, Training Accuracy= 0.969, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.562584\n",
      "Step 1380, Reward=3.7773943, Minibatch Loss= 0.3142, Training Accuracy= 0.898, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.728843\n",
      "Step 1340, Reward=13.457818, Minibatch Loss= 0.1935, Training Accuracy= 0.922, Testing Acc= 0.923963, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.628101\n",
      "Step 1390, Reward=1.570506, Minibatch Loss= 0.1705, Training Accuracy= 0.938, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.791317\n",
      "Step 1350, Reward=12.627392, Minibatch Loss= 0.1186, Training Accuracy= 0.961, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.761982\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1400, Reward=2.0095057, Minibatch Loss= 0.1609, Training Accuracy= 0.953, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.554233\n",
      "Step 1360, Reward=8.076408, Minibatch Loss= 0.0530, Training Accuracy= 0.984, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.625864\n",
      "Step 1410, Reward=1.677603, Minibatch Loss= 0.1337, Training Accuracy= 0.938, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.565248\n",
      "Step 1370, Reward=12.011539, Minibatch Loss= 0.0878, Training Accuracy= 0.977, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.571504\n",
      "Step 1420, Reward=0.27811134, Minibatch Loss= 0.0999, Training Accuracy= 0.969, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.493383\n",
      "Step 1380, Reward=13.126359, Minibatch Loss= 0.2475, Training Accuracy= 0.930, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.625084\n",
      "Step 1430, Reward=1.9589522, Minibatch Loss= 0.1341, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.381517\n",
      "Step 1390, Reward=11.266405, Minibatch Loss= 0.1221, Training Accuracy= 0.961, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.705438\n",
      "Step 1440, Reward=4.434804, Minibatch Loss= 0.1171, Training Accuracy= 0.961, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.410440\n",
      "Step 1400, Reward=12.1150675, Minibatch Loss= 0.1370, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.541630\n",
      "Step 1450, Reward=3.7460332, Minibatch Loss= 0.1947, Training Accuracy= 0.930, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.557686\n",
      "Step 1410, Reward=13.087598, Minibatch Loss= 0.0785, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.626112\n",
      "Step 1460, Reward=3.1439328, Minibatch Loss= 0.1086, Training Accuracy= 0.961, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.528659\n",
      "Step 1420, Reward=9.622313, Minibatch Loss= 0.0785, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.598462\n",
      "Step 1470, Reward=3.6627512, Minibatch Loss= 0.1514, Training Accuracy= 0.938, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.534320\n",
      "Step 1430, Reward=11.633084, Minibatch Loss= 0.1050, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.478186\n",
      "Step 1480, Reward=3.2298918, Minibatch Loss= 0.1560, Training Accuracy= 0.930, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.614150\n",
      "Step 1440, Reward=14.0466, Minibatch Loss= 0.0532, Training Accuracy= 0.977, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.628314\n",
      "Step 1490, Reward=3.8448722, Minibatch Loss= 0.2155, Training Accuracy= 0.938, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.627415\n",
      "Step 1450, Reward=12.758067, Minibatch Loss= 0.1626, Training Accuracy= 0.938, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.866440\n",
      "Step 1500, Reward=4.490593, Minibatch Loss= 0.1403, Training Accuracy= 0.961, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.533773\n",
      "Step 1460, Reward=10.426783, Minibatch Loss= 0.0969, Training Accuracy= 0.969, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.802649\n",
      "Step 1510, Reward=0.9223311, Minibatch Loss= 0.1017, Training Accuracy= 0.961, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.540379\n",
      "Step 1470, Reward=10.5620365, Minibatch Loss= 0.1489, Training Accuracy= 0.938, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.775656\n",
      "Step 1520, Reward=6.3346887, Minibatch Loss= 0.1030, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.692956\n",
      "Step 1480, Reward=11.200354, Minibatch Loss= 0.1014, Training Accuracy= 0.977, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.583874\n",
      "Step 1530, Reward=1.5328923, Minibatch Loss= 0.1630, Training Accuracy= 0.961, Testing Acc= 0.873272, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.577749\n",
      "Step 1490, Reward=14.142466, Minibatch Loss= 0.1736, Training Accuracy= 0.938, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.581623\n",
      "Step 1540, Reward=3.6862154, Minibatch Loss= 0.1636, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.466995\n",
      "Step 1500, Reward=12.615408, Minibatch Loss= 0.1287, Training Accuracy= 0.953, Testing Acc= 0.870968, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.558194\n",
      "Step 1550, Reward=3.2017827, Minibatch Loss= 0.1336, Training Accuracy= 0.953, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.507874\n",
      "Step 1510, Reward=8.19214, Minibatch Loss= 0.1016, Training Accuracy= 0.953, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.592223\n",
      "Step 1560, Reward=3.32485, Minibatch Loss= 0.1080, Training Accuracy= 0.969, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.636681\n",
      "Step 1520, Reward=14.041569, Minibatch Loss= 0.0651, Training Accuracy= 0.984, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.741931\n",
      "Step 1570, Reward=5.946596, Minibatch Loss= 0.2218, Training Accuracy= 0.930, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.634505\n",
      "Step 1530, Reward=8.798316, Minibatch Loss= 0.1365, Training Accuracy= 0.961, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.555497\n",
      "Step 1580, Reward=5.370981, Minibatch Loss= 0.2017, Training Accuracy= 0.938, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.510057\n",
      "Step 1540, Reward=11.118968, Minibatch Loss= 0.1295, Training Accuracy= 0.961, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.601954\n",
      "Step 1590, Reward=4.360012, Minibatch Loss= 0.1372, Training Accuracy= 0.953, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.579089\n",
      "Step 1550, Reward=10.389588, Minibatch Loss= 0.1198, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.691786\n",
      "Step 1600, Reward=3.6890528, Minibatch Loss= 0.1414, Training Accuracy= 0.961, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.599930\n",
      "Step 1560, Reward=12.012663, Minibatch Loss= 0.0801, Training Accuracy= 0.984, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.781201\n",
      "Step 1610, Reward=4.6373754, Minibatch Loss= 0.2388, Training Accuracy= 0.930, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.713425\n",
      "Step 1570, Reward=14.219445, Minibatch Loss= 0.1676, Training Accuracy= 0.914, Testing Acc= 0.873272, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.627151\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1620, Reward=2.7198105, Minibatch Loss= 0.1184, Training Accuracy= 0.938, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.780219\n",
      "Step 1580, Reward=13.80045, Minibatch Loss= 0.1641, Training Accuracy= 0.953, Testing Acc= 0.882488, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.614285\n",
      "Step 1630, Reward=3.723312, Minibatch Loss= 0.1248, Training Accuracy= 0.945, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.758724\n",
      "Step 1590, Reward=12.168597, Minibatch Loss= 0.1111, Training Accuracy= 0.969, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.681003\n",
      "Step 1640, Reward=4.9343, Minibatch Loss= 0.1183, Training Accuracy= 0.969, Testing Acc= 0.884793, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.693583\n",
      "Step 1600, Reward=10.89833, Minibatch Loss= 0.0970, Training Accuracy= 0.969, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.723107\n",
      "Step 1650, Reward=3.6459692, Minibatch Loss= 0.0827, Training Accuracy= 0.977, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.595383\n",
      "Step 1610, Reward=12.824151, Minibatch Loss= 0.2541, Training Accuracy= 0.906, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.691775\n",
      "Step 1620, Reward=9.528759, Minibatch Loss= 0.0717, Training Accuracy= 0.977, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.784899\n",
      "Step 1630, Reward=11.097408, Minibatch Loss= 0.0787, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.722927\n",
      "Step 1640, Reward=12.607007, Minibatch Loss= 0.0917, Training Accuracy= 0.969, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.652920\n",
      "Step 1660, Reward=1.2238294, Minibatch Loss= 0.1480, Training Accuracy= 0.953, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.552729\n",
      "Step 1650, Reward=12.583603, Minibatch Loss= 0.0689, Training Accuracy= 0.977, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.501350\n",
      "Step 1670, Reward=5.4360805, Minibatch Loss= 0.1886, Training Accuracy= 0.930, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.563640\n",
      "Step 1660, Reward=9.706396, Minibatch Loss= 0.1026, Training Accuracy= 0.969, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.586142\n",
      "Step 1680, Reward=4.9826427, Minibatch Loss= 0.2204, Training Accuracy= 0.930, Testing Acc= 0.880184, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.626874\n",
      "Step 1670, Reward=12.495542, Minibatch Loss= 0.1562, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.584341\n",
      "Step 1690, Reward=3.0070086, Minibatch Loss= 0.1103, Training Accuracy= 0.961, Testing Acc= 0.884793, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.646360\n",
      "Step 1680, Reward=12.798227, Minibatch Loss= 0.1653, Training Accuracy= 0.938, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.571807\n",
      "Step 1700, Reward=5.373849, Minibatch Loss= 0.0939, Training Accuracy= 0.961, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.665989\n",
      "Step 1690, Reward=10.612939, Minibatch Loss= 0.0987, Training Accuracy= 0.938, Testing Acc= 0.882488, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.590114\n",
      "Step 1710, Reward=4.6796656, Minibatch Loss= 0.1325, Training Accuracy= 0.977, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.613881\n",
      "Step 1700, Reward=12.652948, Minibatch Loss= 0.0595, Training Accuracy= 0.984, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.509278\n",
      "Step 1720, Reward=3.9713287, Minibatch Loss= 0.1203, Training Accuracy= 0.961, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.637993\n",
      "Step 1710, Reward=11.250774, Minibatch Loss= 0.1099, Training Accuracy= 0.977, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.544726\n",
      "Step 1730, Reward=3.6136463, Minibatch Loss= 0.0859, Training Accuracy= 0.984, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.532007\n",
      "Step 1720, Reward=10.98604, Minibatch Loss= 0.0794, Training Accuracy= 0.992, Testing Acc= 0.877880, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.544869\n",
      "Step 1740, Reward=4.5499573, Minibatch Loss= 0.0964, Training Accuracy= 0.961, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.561886\n",
      "Step 1730, Reward=11.566437, Minibatch Loss= 0.0636, Training Accuracy= 0.977, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.511246\n",
      "Step 1750, Reward=5.7574844, Minibatch Loss= 0.0888, Training Accuracy= 0.977, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.585678\n",
      "Step 1740, Reward=10.86515, Minibatch Loss= 0.0684, Training Accuracy= 0.977, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.465298\n",
      "Step 1750, Reward=13.0121, Minibatch Loss= 0.0737, Training Accuracy= 0.984, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.521159\n",
      "Step 1760, Reward=13.751911, Minibatch Loss= 0.0948, Training Accuracy= 0.969, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.558159\n",
      "Step 1760, Reward=5.3300467, Minibatch Loss= 0.1070, Training Accuracy= 0.938, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.631310\n",
      "Step 1770, Reward=10.728107, Minibatch Loss= 0.1441, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.658837\n",
      "Step 1770, Reward=2.311026, Minibatch Loss= 0.1852, Training Accuracy= 0.953, Testing Acc= 0.880184, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.721676\n",
      "Step 1780, Reward=11.479638, Minibatch Loss= 0.1056, Training Accuracy= 0.953, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.703090\n",
      "Step 1780, Reward=4.0940413, Minibatch Loss= 0.1422, Training Accuracy= 0.953, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.624414\n",
      "Step 1790, Reward=13.650891, Minibatch Loss= 0.0925, Training Accuracy= 0.961, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.530439\n",
      "Step 1790, Reward=5.3859673, Minibatch Loss= 0.1711, Training Accuracy= 0.930, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.535377\n",
      "Step 1800, Reward=12.503382, Minibatch Loss= 0.0970, Training Accuracy= 0.969, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.606426\n",
      "Step 1800, Reward=4.3048882, Minibatch Loss= 0.1316, Training Accuracy= 0.945, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.635214\n",
      "Step 1810, Reward=9.912268, Minibatch Loss= 0.1059, Training Accuracy= 0.961, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.952548\n",
      "Step 1810, Reward=1.9327633, Minibatch Loss= 0.1078, Training Accuracy= 0.961, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.757105\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1820, Reward=11.118588, Minibatch Loss= 0.0815, Training Accuracy= 0.969, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.623651\n",
      "Step 1820, Reward=3.0201366, Minibatch Loss= 0.1046, Training Accuracy= 0.961, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.559477\n",
      "Step 1830, Reward=10.587369, Minibatch Loss= 0.0578, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.631202\n",
      "Step 1840, Reward=11.1605835, Minibatch Loss= 0.1020, Training Accuracy= 0.945, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.726801\n",
      "Step 1850, Reward=11.04893, Minibatch Loss= 0.1376, Training Accuracy= 0.945, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.521070\n",
      "Step 1830, Reward=3.187736, Minibatch Loss= 0.0699, Training Accuracy= 0.984, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.557565\n",
      "Step 1860, Reward=11.680132, Minibatch Loss= 0.0369, Training Accuracy= 0.992, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.497300\n",
      "Step 1840, Reward=3.4841905, Minibatch Loss= 0.1318, Training Accuracy= 0.938, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.742147\n",
      "Step 1870, Reward=15.31543, Minibatch Loss= 0.0882, Training Accuracy= 0.969, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.726913\n",
      "Step 1850, Reward=2.2334387, Minibatch Loss= 0.1813, Training Accuracy= 0.930, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.499110\n",
      "Step 1860, Reward=4.121802, Minibatch Loss= 0.0679, Training Accuracy= 0.984, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.385060\n",
      "Step 1880, Reward=10.014084, Minibatch Loss= 0.1235, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.734398\n",
      "Step 1870, Reward=7.129751, Minibatch Loss= 0.1221, Training Accuracy= 0.961, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.534984\n",
      "Step 1890, Reward=10.527999, Minibatch Loss= 0.1049, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.582185\n",
      "Step 1880, Reward=2.5353494, Minibatch Loss= 0.1463, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.638811\n",
      "Step 1900, Reward=10.76269, Minibatch Loss= 0.0581, Training Accuracy= 0.984, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.595252\n",
      "Step 1890, Reward=2.344086, Minibatch Loss= 0.1209, Training Accuracy= 0.969, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.570033\n",
      "Step 1910, Reward=12.253191, Minibatch Loss= 0.1112, Training Accuracy= 0.961, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.660286\n",
      "Step 1900, Reward=2.7490144, Minibatch Loss= 0.0714, Training Accuracy= 0.984, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.515728\n",
      "Step 1920, Reward=11.174599, Minibatch Loss= 0.0368, Training Accuracy= 1.000, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.539744\n",
      "Step 1910, Reward=4.950663, Minibatch Loss= 0.1642, Training Accuracy= 0.945, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.617362\n",
      "Step 1930, Reward=14.003605, Minibatch Loss= 0.0359, Training Accuracy= 0.992, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.580986\n",
      "Step 1920, Reward=2.733572, Minibatch Loss= 0.0480, Training Accuracy= 0.992, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.572730\n",
      "Step 1930, Reward=5.41506, Minibatch Loss= 0.0504, Training Accuracy= 0.992, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.535161\n",
      "Step 1940, Reward=4.3355527, Minibatch Loss= 0.0913, Training Accuracy= 0.961, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.593338\n",
      "Step 1950, Reward=0.8320547, Minibatch Loss= 0.0897, Training Accuracy= 0.984, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.642193\n",
      "Step 1960, Reward=5.612402, Minibatch Loss= 0.0735, Training Accuracy= 0.984, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.616531\n",
      "Step 1940, Reward=12.264156, Minibatch Loss= 0.0782, Training Accuracy= 0.977, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.643089\n",
      "Step 1970, Reward=3.9744403, Minibatch Loss= 0.1353, Training Accuracy= 0.953, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.493581\n",
      "Step 1950, Reward=8.732045, Minibatch Loss= 0.0898, Training Accuracy= 0.969, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.750856\n",
      "Step 1980, Reward=2.5771704, Minibatch Loss= 0.1467, Training Accuracy= 0.961, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.538476\n",
      "Step 1960, Reward=14.225879, Minibatch Loss= 0.0646, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.649969\n",
      "Step 1990, Reward=6.7179785, Minibatch Loss= 0.0889, Training Accuracy= 0.961, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.960819, Max AP=  0.953220, Max Q=  0.590105\n",
      "Step 1970, Reward=12.522845, Minibatch Loss= 0.1328, Training Accuracy= 0.953, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.591210\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.92626727\n",
      "Step 1980, Reward=10.687826, Minibatch Loss= 0.1283, Training Accuracy= 0.953, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.685161\n",
      "Step 1990, Reward=15.3884115, Minibatch Loss= 0.0606, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.961010, Max AP=  0.957590, Max Q=  1.634937\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.92626727\n",
      "Step 10, Reward=-33.683224, Minibatch Loss= 0.2969, Training Accuracy= 0.906, Testing Acc= 0.866359, Max Final Accuracy=  0.866359, Max AUC=  0.902504, Max AP=  0.818460, Max Q=  -0.231734\n",
      "Step 20, Reward=-10.263526, Minibatch Loss= 0.3804, Training Accuracy= 0.852, Testing Acc= 0.868664, Max Final Accuracy=  0.868664, Max AUC=  0.936673, Max AP=  0.917916, Max Q=  -0.143786\n",
      "Step 10, Reward=-34.97623, Minibatch Loss= 0.2612, Training Accuracy= 0.898, Testing Acc= 0.868664, Max Final Accuracy=  0.868664, Max AUC=  0.903799, Max AP=  0.838904, Max Q=  -0.211756\n",
      "Step 30, Reward=-6.606614, Minibatch Loss= 0.3285, Training Accuracy= 0.859, Testing Acc= 0.873272, Max Final Accuracy=  0.873272, Max AUC=  0.945529, Max AP=  0.931268, Max Q=  -0.073433\n",
      "Step 20, Reward=-7.8245134, Minibatch Loss= 0.3394, Training Accuracy= 0.836, Testing Acc= 0.870968, Max Final Accuracy=  0.870968, Max AUC=  0.934019, Max AP=  0.916141, Max Q=  -0.166006\n",
      "Step 40, Reward=-9.500265, Minibatch Loss= 0.2783, Training Accuracy= 0.898, Testing Acc= 0.887097, Max Final Accuracy=  0.887097, Max AUC=  0.940729, Max AP=  0.925354, Max Q=  -0.211376\n",
      "Step 30, Reward=-8.979035, Minibatch Loss= 0.3941, Training Accuracy= 0.828, Testing Acc= 0.870968, Max Final Accuracy=  0.870968, Max AUC=  0.934019, Max AP=  0.916141, Max Q=  -0.063710\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 50, Reward=-6.5740433, Minibatch Loss= 0.2982, Training Accuracy= 0.852, Testing Acc= 0.889401, Max Final Accuracy=  0.889401, Max AUC=  0.947249, Max AP=  0.936760, Max Q=  -0.432608\n",
      "Step 40, Reward=-5.500559, Minibatch Loss= 0.3097, Training Accuracy= 0.891, Testing Acc= 0.870968, Max Final Accuracy=  0.870968, Max AUC=  0.934019, Max AP=  0.916141, Max Q=  -0.189462\n",
      "Step 60, Reward=-3.3686233, Minibatch Loss= 0.2160, Training Accuracy= 0.922, Testing Acc= 0.889401, Max Final Accuracy=  0.889401, Max AUC=  0.947249, Max AP=  0.936760, Max Q=  -0.444078\n",
      "Step 50, Reward=-5.8517723, Minibatch Loss= 0.2456, Training Accuracy= 0.898, Testing Acc= 0.873272, Max Final Accuracy=  0.873272, Max AUC=  0.948990, Max AP=  0.936745, Max Q=  -0.428627\n",
      "Step 70, Reward=-4.0211067, Minibatch Loss= 0.3126, Training Accuracy= 0.883, Testing Acc= 0.887097, Max Final Accuracy=  0.889401, Max AUC=  0.947249, Max AP=  0.936760, Max Q=  -0.415866\n",
      "Step 60, Reward=-6.042875, Minibatch Loss= 0.3068, Training Accuracy= 0.859, Testing Acc= 0.887097, Max Final Accuracy=  0.887097, Max AUC=  0.946675, Max AP=  0.933707, Max Q=  -0.513220\n",
      "Step 80, Reward=-3.1822693, Minibatch Loss= 0.3031, Training Accuracy= 0.883, Testing Acc= 0.889401, Max Final Accuracy=  0.889401, Max AUC=  0.947249, Max AP=  0.936760, Max Q=  -0.349632\n",
      "Step 70, Reward=-5.542115, Minibatch Loss= 0.3774, Training Accuracy= 0.836, Testing Acc= 0.884793, Max Final Accuracy=  0.887097, Max AUC=  0.946675, Max AP=  0.933707, Max Q=  -0.332041\n",
      "Step 90, Reward=-4.6205807, Minibatch Loss= 0.2594, Training Accuracy= 0.891, Testing Acc= 0.889401, Max Final Accuracy=  0.889401, Max AUC=  0.947249, Max AP=  0.936760, Max Q=  -0.493976\n",
      "Step 80, Reward=-4.1480513, Minibatch Loss= 0.2910, Training Accuracy= 0.898, Testing Acc= 0.875576, Max Final Accuracy=  0.887097, Max AUC=  0.946675, Max AP=  0.933707, Max Q=  -0.391349\n",
      "Step 100, Reward=-3.283381, Minibatch Loss= 0.2786, Training Accuracy= 0.883, Testing Acc= 0.894009, Max Final Accuracy=  0.894009, Max AUC=  0.956274, Max AP=  0.951272, Max Q=  -0.317015\n",
      "Step 90, Reward=-5.0693755, Minibatch Loss= 0.2240, Training Accuracy= 0.906, Testing Acc= 0.880184, Max Final Accuracy=  0.887097, Max AUC=  0.946675, Max AP=  0.933707, Max Q=  -0.530024\n",
      "Step 110, Reward=-1.4350965, Minibatch Loss= 0.3185, Training Accuracy= 0.875, Testing Acc= 0.889401, Max Final Accuracy=  0.894009, Max AUC=  0.956274, Max AP=  0.951272, Max Q=  -0.296529\n",
      "Step 100, Reward=-0.62329257, Minibatch Loss= 0.1849, Training Accuracy= 0.945, Testing Acc= 0.894009, Max Final Accuracy=  0.894009, Max AUC=  0.958780, Max AP=  0.953172, Max Q=  -0.470588\n",
      "Step 120, Reward=-2.1547582, Minibatch Loss= 0.3154, Training Accuracy= 0.883, Testing Acc= 0.891705, Max Final Accuracy=  0.894009, Max AUC=  0.956274, Max AP=  0.951272, Max Q=  -0.254282\n",
      "Step 110, Reward=-0.2763654, Minibatch Loss= 0.2428, Training Accuracy= 0.922, Testing Acc= 0.894009, Max Final Accuracy=  0.894009, Max AUC=  0.958780, Max AP=  0.953172, Max Q=  -0.313468\n",
      "Step 130, Reward=-5.087034, Minibatch Loss= 0.2779, Training Accuracy= 0.898, Testing Acc= 0.882488, Max Final Accuracy=  0.894009, Max AUC=  0.956274, Max AP=  0.951272, Max Q=  -0.199445\n",
      "Step 120, Reward=1.3794421, Minibatch Loss= 0.2818, Training Accuracy= 0.867, Testing Acc= 0.896313, Max Final Accuracy=  0.896313, Max AUC=  0.961307, Max AP=  0.955157, Max Q=  -0.269969\n",
      "Step 140, Reward=-2.5974464, Minibatch Loss= 0.2804, Training Accuracy= 0.906, Testing Acc= 0.894009, Max Final Accuracy=  0.894009, Max AUC=  0.956274, Max AP=  0.951272, Max Q=  -0.186321\n",
      "Step 130, Reward=-3.4574356, Minibatch Loss= 0.2857, Training Accuracy= 0.891, Testing Acc= 0.903226, Max Final Accuracy=  0.903226, Max AUC=  0.962879, Max AP=  0.956397, Max Q=  -0.199205\n",
      "Step 150, Reward=0.19144273, Minibatch Loss= 0.3011, Training Accuracy= 0.867, Testing Acc= 0.900922, Max Final Accuracy=  0.900922, Max AUC=  0.962730, Max AP=  0.958667, Max Q=  -0.160898\n",
      "Step 140, Reward=-3.9998345, Minibatch Loss= 0.2395, Training Accuracy= 0.938, Testing Acc= 0.896313, Max Final Accuracy=  0.903226, Max AUC=  0.962879, Max AP=  0.956397, Max Q=  -0.110809\n",
      "Step 160, Reward=-3.4083912, Minibatch Loss= 0.2697, Training Accuracy= 0.883, Testing Acc= 0.896313, Max Final Accuracy=  0.900922, Max AUC=  0.962730, Max AP=  0.958667, Max Q=  -0.155061\n",
      "Step 150, Reward=-1.5255361, Minibatch Loss= 0.2761, Training Accuracy= 0.898, Testing Acc= 0.882488, Max Final Accuracy=  0.903226, Max AUC=  0.962879, Max AP=  0.956397, Max Q=  -0.088368\n",
      "Step 170, Reward=-0.073967695, Minibatch Loss= 0.2469, Training Accuracy= 0.891, Testing Acc= 0.894009, Max Final Accuracy=  0.900922, Max AUC=  0.962730, Max AP=  0.958667, Max Q=  -0.133221\n",
      "Step 160, Reward=-4.1759186, Minibatch Loss= 0.2445, Training Accuracy= 0.898, Testing Acc= 0.898618, Max Final Accuracy=  0.903226, Max AUC=  0.962879, Max AP=  0.956397, Max Q=  -0.094587\n",
      "Step 180, Reward=1.246662, Minibatch Loss= 0.2494, Training Accuracy= 0.922, Testing Acc= 0.907834, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  -0.139688\n",
      "Step 170, Reward=-4.441348, Minibatch Loss= 0.3220, Training Accuracy= 0.859, Testing Acc= 0.896313, Max Final Accuracy=  0.903226, Max AUC=  0.962879, Max AP=  0.956397, Max Q=  -0.101439\n",
      "Step 190, Reward=1.4876411, Minibatch Loss= 0.2695, Training Accuracy= 0.914, Testing Acc= 0.898618, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  -0.077605\n",
      "Step 180, Reward=-3.2766006, Minibatch Loss= 0.2627, Training Accuracy= 0.906, Testing Acc= 0.912442, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.134979\n",
      "Step 200, Reward=2.7855332, Minibatch Loss= 0.2217, Training Accuracy= 0.898, Testing Acc= 0.898618, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  -0.045891\n",
      "Step 190, Reward=1.0294096, Minibatch Loss= 0.2040, Training Accuracy= 0.930, Testing Acc= 0.887097, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.112746\n",
      "Step 210, Reward=-4.581682, Minibatch Loss= 0.2659, Training Accuracy= 0.898, Testing Acc= 0.896313, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  -0.020939\n",
      "Step 200, Reward=-3.332241, Minibatch Loss= 0.2504, Training Accuracy= 0.883, Testing Acc= 0.884793, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.102155\n",
      "Step 220, Reward=3.4477816, Minibatch Loss= 0.2369, Training Accuracy= 0.922, Testing Acc= 0.889401, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  -0.015516\n",
      "Step 210, Reward=-3.6451068, Minibatch Loss= 0.2044, Training Accuracy= 0.930, Testing Acc= 0.898618, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.106519\n",
      "Step 230, Reward=-1.2280941, Minibatch Loss= 0.2318, Training Accuracy= 0.914, Testing Acc= 0.900922, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  0.018898\n",
      "Step 220, Reward=-1.1017554, Minibatch Loss= 0.2377, Training Accuracy= 0.891, Testing Acc= 0.896313, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.102749\n",
      "Step 240, Reward=3.1125355, Minibatch Loss= 0.1603, Training Accuracy= 0.930, Testing Acc= 0.896313, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  0.023626\n",
      "Step 230, Reward=-0.6653609, Minibatch Loss= 0.2957, Training Accuracy= 0.898, Testing Acc= 0.900922, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.067137\n",
      "Step 250, Reward=-2.0518396, Minibatch Loss= 0.1968, Training Accuracy= 0.945, Testing Acc= 0.905530, Max Final Accuracy=  0.907834, Max AUC=  0.963622, Max AP=  0.958697, Max Q=  0.027934\n",
      "Step 240, Reward=-3.9103873, Minibatch Loss= 0.3117, Training Accuracy= 0.852, Testing Acc= 0.891705, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.111581\n",
      "Step 260, Reward=1.6514282, Minibatch Loss= 0.2133, Training Accuracy= 0.930, Testing Acc= 0.917051, Max Final Accuracy=  0.917051, Max AUC=  0.963665, Max AP=  0.958898, Max Q=  0.038900\n",
      "Step 250, Reward=-4.5707073, Minibatch Loss= 0.2408, Training Accuracy= 0.906, Testing Acc= 0.891705, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.089256\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 270, Reward=-0.9098439, Minibatch Loss= 0.2109, Training Accuracy= 0.922, Testing Acc= 0.910138, Max Final Accuracy=  0.917051, Max AUC=  0.963665, Max AP=  0.958898, Max Q=  0.059381\n",
      "Step 260, Reward=-4.8475246, Minibatch Loss= 0.1769, Training Accuracy= 0.938, Testing Acc= 0.887097, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.103964\n",
      "Step 280, Reward=-1.3880252, Minibatch Loss= 0.2050, Training Accuracy= 0.914, Testing Acc= 0.921659, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.055415\n",
      "Step 270, Reward=-1.6000565, Minibatch Loss= 0.2438, Training Accuracy= 0.883, Testing Acc= 0.891705, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.073588\n",
      "Step 290, Reward=4.811658, Minibatch Loss= 0.2684, Training Accuracy= 0.891, Testing Acc= 0.903226, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.073707\n",
      "Step 280, Reward=-2.8331556, Minibatch Loss= 0.2072, Training Accuracy= 0.898, Testing Acc= 0.905530, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.066023\n",
      "Step 300, Reward=0.64915586, Minibatch Loss= 0.1890, Training Accuracy= 0.906, Testing Acc= 0.896313, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.081630\n",
      "Step 290, Reward=-0.9129633, Minibatch Loss= 0.2274, Training Accuracy= 0.898, Testing Acc= 0.903226, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.114450\n",
      "Step 310, Reward=-4.642416, Minibatch Loss= 0.2143, Training Accuracy= 0.914, Testing Acc= 0.903226, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.129079\n",
      "Step 300, Reward=-1.5414102, Minibatch Loss= 0.2990, Training Accuracy= 0.844, Testing Acc= 0.898618, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.099647\n",
      "Step 320, Reward=0.6253475, Minibatch Loss= 0.2297, Training Accuracy= 0.922, Testing Acc= 0.905530, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.157639\n",
      "Step 310, Reward=-4.3320713, Minibatch Loss= 0.3106, Training Accuracy= 0.867, Testing Acc= 0.889401, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.088533\n",
      "Step 330, Reward=-3.4663808, Minibatch Loss= 0.2363, Training Accuracy= 0.906, Testing Acc= 0.894009, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.208247\n",
      "Step 320, Reward=1.1518425, Minibatch Loss= 0.1428, Training Accuracy= 0.945, Testing Acc= 0.900922, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.099195\n",
      "Step 340, Reward=-1.9901472, Minibatch Loss= 0.2117, Training Accuracy= 0.906, Testing Acc= 0.912442, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.329785\n",
      "Step 330, Reward=-2.547811, Minibatch Loss= 0.2331, Training Accuracy= 0.906, Testing Acc= 0.903226, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.120119\n",
      "Step 350, Reward=1.2648373, Minibatch Loss= 0.2448, Training Accuracy= 0.914, Testing Acc= 0.910138, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.393820\n",
      "Step 340, Reward=0.8277198, Minibatch Loss= 0.2271, Training Accuracy= 0.922, Testing Acc= 0.905530, Max Final Accuracy=  0.912442, Max AUC=  0.962475, Max AP=  0.957754, Max Q=  -0.065031\n",
      "Step 350, Reward=0.37828553, Minibatch Loss= 0.1954, Training Accuracy= 0.930, Testing Acc= 0.919355, Max Final Accuracy=  0.919355, Max AUC=  0.961732, Max AP=  0.956740, Max Q=  -0.046426\n",
      "Step 360, Reward=-1.2326338, Minibatch Loss= 0.2427, Training Accuracy= 0.906, Testing Acc= 0.914747, Max Final Accuracy=  0.919355, Max AUC=  0.961732, Max AP=  0.956740, Max Q=  -0.045422\n",
      "Step 370, Reward=-2.4134808, Minibatch Loss= 0.1542, Training Accuracy= 0.945, Testing Acc= 0.910138, Max Final Accuracy=  0.919355, Max AUC=  0.961732, Max AP=  0.956740, Max Q=  -0.025416\n",
      "Step 380, Reward=2.382894, Minibatch Loss= 0.2401, Training Accuracy= 0.938, Testing Acc= 0.903226, Max Final Accuracy=  0.919355, Max AUC=  0.961732, Max AP=  0.956740, Max Q=  0.000367\n",
      "Step 360, Reward=0.18400669, Minibatch Loss= 0.2217, Training Accuracy= 0.938, Testing Acc= 0.896313, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.280962\n",
      "Step 390, Reward=-0.48727965, Minibatch Loss= 0.2680, Training Accuracy= 0.906, Testing Acc= 0.912442, Max Final Accuracy=  0.919355, Max AUC=  0.961732, Max AP=  0.956740, Max Q=  0.017071\n",
      "Step 370, Reward=2.5820122, Minibatch Loss= 0.3005, Training Accuracy= 0.859, Testing Acc= 0.903226, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.083322\n",
      "Step 400, Reward=0.81920457, Minibatch Loss= 0.2096, Training Accuracy= 0.914, Testing Acc= 0.926267, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.039589\n",
      "Step 380, Reward=1.5871136, Minibatch Loss= 0.1980, Training Accuracy= 0.914, Testing Acc= 0.919355, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.137710\n",
      "Step 410, Reward=-1.4228668, Minibatch Loss= 0.2041, Training Accuracy= 0.914, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.057009\n",
      "Step 390, Reward=0.0075343847, Minibatch Loss= 0.1877, Training Accuracy= 0.930, Testing Acc= 0.912442, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.214017\n",
      "Step 420, Reward=-0.96828663, Minibatch Loss= 0.2483, Training Accuracy= 0.891, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.075138\n",
      "Step 400, Reward=2.882726, Minibatch Loss= 0.2269, Training Accuracy= 0.922, Testing Acc= 0.910138, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.188321\n",
      "Step 430, Reward=-3.0417323, Minibatch Loss= 0.1990, Training Accuracy= 0.930, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.068346\n",
      "Step 410, Reward=-0.114369154, Minibatch Loss= 0.2577, Training Accuracy= 0.883, Testing Acc= 0.917051, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.192687\n",
      "Step 440, Reward=0.72901535, Minibatch Loss= 0.2351, Training Accuracy= 0.898, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.005531\n",
      "Step 420, Reward=2.7807734, Minibatch Loss= 0.2632, Training Accuracy= 0.914, Testing Acc= 0.912442, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.265312\n",
      "Step 450, Reward=-2.3233662, Minibatch Loss= 0.1713, Training Accuracy= 0.938, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.006252\n",
      "Step 430, Reward=4.5267277, Minibatch Loss= 0.1180, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.391398\n",
      "Step 460, Reward=0.87420964, Minibatch Loss= 0.1171, Training Accuracy= 0.961, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.022333\n",
      "Step 440, Reward=1.1694498, Minibatch Loss= 0.2552, Training Accuracy= 0.891, Testing Acc= 0.907834, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.377371\n",
      "Step 470, Reward=0.37523365, Minibatch Loss= 0.1547, Training Accuracy= 0.961, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.005004\n",
      "Step 450, Reward=1.2268506, Minibatch Loss= 0.1635, Training Accuracy= 0.945, Testing Acc= 0.912442, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.347974\n",
      "Step 480, Reward=-2.3362498, Minibatch Loss= 0.1688, Training Accuracy= 0.938, Testing Acc= 0.923963, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  -0.005035\n",
      "Step 460, Reward=4.304874, Minibatch Loss= 0.1520, Training Accuracy= 0.945, Testing Acc= 0.891705, Max Final Accuracy=  0.921659, Max AUC=  0.965066, Max AP=  0.960540, Max Q=  0.499044\n",
      "Step 490, Reward=3.1592977, Minibatch Loss= 0.2241, Training Accuracy= 0.938, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.033609\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 470, Reward=5.6248827, Minibatch Loss= 0.1691, Training Accuracy= 0.945, Testing Acc= 0.923963, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.484677\n",
      "Step 500, Reward=-0.82693005, Minibatch Loss= 0.1728, Training Accuracy= 0.945, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.106753\n",
      "Step 480, Reward=3.188064, Minibatch Loss= 0.2196, Training Accuracy= 0.930, Testing Acc= 0.910138, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.543243\n",
      "Step 510, Reward=-1.2216014, Minibatch Loss= 0.2149, Training Accuracy= 0.938, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.108070\n",
      "Step 490, Reward=2.2782893, Minibatch Loss= 0.1441, Training Accuracy= 0.945, Testing Acc= 0.910138, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.731230\n",
      "Step 520, Reward=0.594321, Minibatch Loss= 0.1632, Training Accuracy= 0.945, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.086865\n",
      "Step 500, Reward=4.856189, Minibatch Loss= 0.2466, Training Accuracy= 0.914, Testing Acc= 0.889401, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.562381\n",
      "Step 530, Reward=1.9200746, Minibatch Loss= 0.1881, Training Accuracy= 0.930, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.103046\n",
      "Step 510, Reward=-0.38668346, Minibatch Loss= 0.2081, Training Accuracy= 0.930, Testing Acc= 0.903226, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.632863\n",
      "Step 540, Reward=-1.8893516, Minibatch Loss= 0.2269, Training Accuracy= 0.914, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.120151\n",
      "Step 520, Reward=1.9311483, Minibatch Loss= 0.1998, Training Accuracy= 0.922, Testing Acc= 0.903226, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.683975\n",
      "Step 550, Reward=-0.3010862, Minibatch Loss= 0.1513, Training Accuracy= 0.938, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.162178\n",
      "Step 530, Reward=-0.6239507, Minibatch Loss= 0.2346, Training Accuracy= 0.898, Testing Acc= 0.912442, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.635595\n",
      "Step 560, Reward=0.43711698, Minibatch Loss= 0.1263, Training Accuracy= 0.945, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.172822\n",
      "Step 540, Reward=4.253129, Minibatch Loss= 0.2857, Training Accuracy= 0.883, Testing Acc= 0.923963, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.564749\n",
      "Step 570, Reward=3.835616, Minibatch Loss= 0.2186, Training Accuracy= 0.906, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.084507\n",
      "Step 550, Reward=4.853957, Minibatch Loss= 0.1903, Training Accuracy= 0.938, Testing Acc= 0.914747, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.543522\n",
      "Step 580, Reward=-1.5067413, Minibatch Loss= 0.1943, Training Accuracy= 0.930, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.108101\n",
      "Step 560, Reward=2.529453, Minibatch Loss= 0.2155, Training Accuracy= 0.914, Testing Acc= 0.910138, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.605200\n",
      "Step 570, Reward=3.3405693, Minibatch Loss= 0.2270, Training Accuracy= 0.898, Testing Acc= 0.919355, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.928230\n",
      "Step 590, Reward=2.5300412, Minibatch Loss= 0.1662, Training Accuracy= 0.953, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.160077\n",
      "Step 580, Reward=0.7862855, Minibatch Loss= 0.1883, Training Accuracy= 0.930, Testing Acc= 0.912442, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  0.998056\n",
      "Step 600, Reward=-2.413907, Minibatch Loss= 0.1374, Training Accuracy= 0.977, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.175049\n",
      "Step 590, Reward=-1.9122458, Minibatch Loss= 0.1322, Training Accuracy= 0.961, Testing Acc= 0.905530, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  1.177001\n",
      "Step 610, Reward=1.5070553, Minibatch Loss= 0.0899, Training Accuracy= 0.984, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.196870\n",
      "Step 600, Reward=4.3065224, Minibatch Loss= 0.2033, Training Accuracy= 0.922, Testing Acc= 0.917051, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  1.228397\n",
      "Step 620, Reward=-0.4539026, Minibatch Loss= 0.1241, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.207668\n",
      "Step 610, Reward=2.718826, Minibatch Loss= 0.2200, Training Accuracy= 0.938, Testing Acc= 0.889401, Max Final Accuracy=  0.923963, Max AUC=  0.965724, Max AP=  0.961284, Max Q=  1.252381\n",
      "Step 630, Reward=0.09179556, Minibatch Loss= 0.2100, Training Accuracy= 0.922, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.394415\n",
      "Step 620, Reward=0.15551329, Minibatch Loss= 0.2180, Training Accuracy= 0.914, Testing Acc= 0.926267, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.147937\n",
      "Step 640, Reward=0.9372617, Minibatch Loss= 0.2086, Training Accuracy= 0.938, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.289631\n",
      "Step 630, Reward=0.21972132, Minibatch Loss= 0.1918, Training Accuracy= 0.930, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.862008\n",
      "Step 640, Reward=4.872283, Minibatch Loss= 0.1925, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.546485\n",
      "Step 650, Reward=-1.6515415, Minibatch Loss= 0.1476, Training Accuracy= 0.938, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.442545\n",
      "Step 660, Reward=5.4664545, Minibatch Loss= 0.1917, Training Accuracy= 0.945, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.372516\n",
      "Step 670, Reward=0.5414851, Minibatch Loss= 0.1257, Training Accuracy= 0.969, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.423122\n",
      "Step 650, Reward=2.1610212, Minibatch Loss= 0.0686, Training Accuracy= 0.984, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.217993\n",
      "Step 680, Reward=-1.288987, Minibatch Loss= 0.2011, Training Accuracy= 0.922, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.572549\n",
      "Step 660, Reward=4.950811, Minibatch Loss= 0.1694, Training Accuracy= 0.953, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.203078\n",
      "Step 690, Reward=3.186489, Minibatch Loss= 0.1570, Training Accuracy= 0.930, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.690328\n",
      "Step 670, Reward=-0.7188959, Minibatch Loss= 0.1595, Training Accuracy= 0.938, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.216343\n",
      "Step 700, Reward=2.898808, Minibatch Loss= 0.1772, Training Accuracy= 0.930, Testing Acc= 0.919355, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.540662\n",
      "Step 680, Reward=0.9216472, Minibatch Loss= 0.2256, Training Accuracy= 0.922, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.284157\n",
      "Step 710, Reward=3.7115972, Minibatch Loss= 0.2503, Training Accuracy= 0.898, Testing Acc= 0.882488, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.670990\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 690, Reward=-0.7826214, Minibatch Loss= 0.2199, Training Accuracy= 0.906, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.489837\n",
      "Step 720, Reward=2.006227, Minibatch Loss= 0.1486, Training Accuracy= 0.953, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.579972\n",
      "Step 700, Reward=-0.45356607, Minibatch Loss= 0.2312, Training Accuracy= 0.922, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.529780\n",
      "Step 730, Reward=2.56364, Minibatch Loss= 0.1899, Training Accuracy= 0.930, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.612222\n",
      "Step 710, Reward=1.5290223, Minibatch Loss= 0.1954, Training Accuracy= 0.930, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.392968\n",
      "Step 740, Reward=-0.45969522, Minibatch Loss= 0.2038, Training Accuracy= 0.930, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.672082\n",
      "Step 720, Reward=1.9777719, Minibatch Loss= 0.2177, Training Accuracy= 0.930, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.140542\n",
      "Step 730, Reward=1.8635914, Minibatch Loss= 0.1774, Training Accuracy= 0.945, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.176774\n",
      "Step 750, Reward=3.81748, Minibatch Loss= 0.1992, Training Accuracy= 0.922, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.604209\n",
      "Step 760, Reward=4.6269026, Minibatch Loss= 0.2179, Training Accuracy= 0.906, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.494730\n",
      "Step 740, Reward=3.2070966, Minibatch Loss= 0.2060, Training Accuracy= 0.930, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.250042\n",
      "Step 770, Reward=2.871748, Minibatch Loss= 0.2047, Training Accuracy= 0.930, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.729965\n",
      "Step 750, Reward=3.5662696, Minibatch Loss= 0.0698, Training Accuracy= 0.977, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.234767\n",
      "Step 780, Reward=3.0109038, Minibatch Loss= 0.1935, Training Accuracy= 0.922, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.580897\n",
      "Step 760, Reward=5.4519196, Minibatch Loss= 0.1668, Training Accuracy= 0.938, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.251547\n",
      "Step 790, Reward=4.1750913, Minibatch Loss= 0.1019, Training Accuracy= 0.953, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.624269\n",
      "Step 770, Reward=6.250387, Minibatch Loss= 0.2865, Training Accuracy= 0.883, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.349474\n",
      "Step 800, Reward=2.966631, Minibatch Loss= 0.0710, Training Accuracy= 0.984, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.617378\n",
      "Step 780, Reward=0.765345, Minibatch Loss= 0.1347, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.494995\n",
      "Step 810, Reward=4.4216833, Minibatch Loss= 0.2037, Training Accuracy= 0.930, Testing Acc= 0.877880, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.710753\n",
      "Step 790, Reward=-0.2676822, Minibatch Loss= 0.1525, Training Accuracy= 0.953, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.795927\n",
      "Step 820, Reward=4.888496, Minibatch Loss= 0.1523, Training Accuracy= 0.961, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.604082\n",
      "Step 800, Reward=3.5661006, Minibatch Loss= 0.2425, Training Accuracy= 0.930, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.574376\n",
      "Step 830, Reward=2.606415, Minibatch Loss= 0.1054, Training Accuracy= 0.969, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.543776\n",
      "Step 810, Reward=5.035161, Minibatch Loss= 0.1135, Training Accuracy= 0.969, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.612329\n",
      "Step 840, Reward=2.536755, Minibatch Loss= 0.1385, Training Accuracy= 0.945, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.587479\n",
      "Step 850, Reward=5.199073, Minibatch Loss= 0.1697, Training Accuracy= 0.922, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.668191\n",
      "Step 860, Reward=5.1525006, Minibatch Loss= 0.2682, Training Accuracy= 0.883, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.599432\n",
      "Step 870, Reward=2.7189598, Minibatch Loss= 0.1161, Training Accuracy= 0.953, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.581304\n",
      "Step 820, Reward=0.04205811, Minibatch Loss= 0.1663, Training Accuracy= 0.938, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.579084\n",
      "Step 880, Reward=5.535446, Minibatch Loss= 0.1370, Training Accuracy= 0.969, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.600567\n",
      "Step 830, Reward=-0.5422331, Minibatch Loss= 0.0844, Training Accuracy= 0.977, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.540195\n",
      "Step 890, Reward=6.336552, Minibatch Loss= 0.1583, Training Accuracy= 0.914, Testing Acc= 0.923963, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.657269\n",
      "Step 840, Reward=4.4096413, Minibatch Loss= 0.1506, Training Accuracy= 0.938, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.507937\n",
      "Step 900, Reward=2.881538, Minibatch Loss= 0.1223, Training Accuracy= 0.969, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.758064\n",
      "Step 850, Reward=1.275228, Minibatch Loss= 0.1930, Training Accuracy= 0.938, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.545204\n",
      "Step 910, Reward=3.7848506, Minibatch Loss= 0.1336, Training Accuracy= 0.945, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.791899\n",
      "Step 860, Reward=-0.85886025, Minibatch Loss= 0.2016, Training Accuracy= 0.938, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.556111\n",
      "Step 920, Reward=6.922163, Minibatch Loss= 0.1050, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.799854\n",
      "Step 870, Reward=1.5148377, Minibatch Loss= 0.2376, Training Accuracy= 0.922, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.607241\n",
      "Step 930, Reward=4.8842926, Minibatch Loss= 0.1548, Training Accuracy= 0.953, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.786307\n",
      "Step 880, Reward=-0.17159784, Minibatch Loss= 0.1786, Training Accuracy= 0.938, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.397389\n",
      "Step 940, Reward=4.084936, Minibatch Loss= 0.0886, Training Accuracy= 0.984, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.649744\n",
      "Step 890, Reward=2.157432, Minibatch Loss= 0.1736, Training Accuracy= 0.930, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.427539\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 950, Reward=5.484989, Minibatch Loss= 0.1726, Training Accuracy= 0.922, Testing Acc= 0.926267, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.677251\n",
      "Step 900, Reward=2.3935828, Minibatch Loss= 0.1151, Training Accuracy= 0.961, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.485903\n",
      "Step 960, Reward=3.9173527, Minibatch Loss= 0.1511, Training Accuracy= 0.945, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.805625\n",
      "Step 910, Reward=2.838383, Minibatch Loss= 0.1673, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.250942\n",
      "Step 970, Reward=3.8393762, Minibatch Loss= 0.1192, Training Accuracy= 0.961, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.735713\n",
      "Step 920, Reward=6.486602, Minibatch Loss= 0.2363, Training Accuracy= 0.898, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.386221\n",
      "Step 980, Reward=3.7200966, Minibatch Loss= 0.1113, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.690671\n",
      "Step 930, Reward=2.2924814, Minibatch Loss= 0.1357, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.519573\n",
      "Step 990, Reward=8.789995, Minibatch Loss= 0.1040, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.702600\n",
      "Step 940, Reward=5.5790377, Minibatch Loss= 0.1003, Training Accuracy= 0.977, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.575240\n",
      "Step 1000, Reward=7.084664, Minibatch Loss= 0.1367, Training Accuracy= 0.961, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.733019\n",
      "Step 950, Reward=1.8653206, Minibatch Loss= 0.1661, Training Accuracy= 0.953, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.531937\n",
      "Step 1010, Reward=6.204202, Minibatch Loss= 0.1269, Training Accuracy= 0.945, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.904479\n",
      "Step 960, Reward=7.8720675, Minibatch Loss= 0.1801, Training Accuracy= 0.930, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.661465\n",
      "Step 1020, Reward=3.2569141, Minibatch Loss= 0.1520, Training Accuracy= 0.945, Testing Acc= 0.868664, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.966626\n",
      "Step 970, Reward=5.44053, Minibatch Loss= 0.1367, Training Accuracy= 0.961, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.594529\n",
      "Step 1030, Reward=2.2892942, Minibatch Loss= 0.1266, Training Accuracy= 0.953, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.175415\n",
      "Step 980, Reward=5.490271, Minibatch Loss= 0.1798, Training Accuracy= 0.938, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.660519\n",
      "Step 1040, Reward=6.461926, Minibatch Loss= 0.1565, Training Accuracy= 0.945, Testing Acc= 0.923963, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.133102\n",
      "Step 990, Reward=4.125369, Minibatch Loss= 0.1959, Training Accuracy= 0.930, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.776679\n",
      "Step 1050, Reward=6.0665674, Minibatch Loss= 0.1215, Training Accuracy= 0.969, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.943869\n",
      "Step 1000, Reward=7.1869593, Minibatch Loss= 0.1853, Training Accuracy= 0.914, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.690467\n",
      "Step 1060, Reward=7.5444365, Minibatch Loss= 0.2420, Training Accuracy= 0.898, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.921896\n",
      "Step 1010, Reward=5.797489, Minibatch Loss= 0.1299, Training Accuracy= 0.953, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.771454\n",
      "Step 1070, Reward=6.7776227, Minibatch Loss= 0.1189, Training Accuracy= 0.969, Testing Acc= 0.887097, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.877959\n",
      "Step 1020, Reward=4.070493, Minibatch Loss= 0.1089, Training Accuracy= 0.953, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.689283\n",
      "Step 1080, Reward=6.2652073, Minibatch Loss= 0.2306, Training Accuracy= 0.898, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.877581\n",
      "Step 1030, Reward=4.0450597, Minibatch Loss= 0.2308, Training Accuracy= 0.891, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.690282\n",
      "Step 1090, Reward=3.8222702, Minibatch Loss= 0.0590, Training Accuracy= 0.992, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.765765\n",
      "Step 1040, Reward=4.74436, Minibatch Loss= 0.1327, Training Accuracy= 0.945, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.842021\n",
      "Step 1100, Reward=7.1269937, Minibatch Loss= 0.1357, Training Accuracy= 0.938, Testing Acc= 0.926267, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.913692\n",
      "Step 1050, Reward=2.8474073, Minibatch Loss= 0.1579, Training Accuracy= 0.945, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.685960\n",
      "Step 1110, Reward=3.5290475, Minibatch Loss= 0.1211, Training Accuracy= 0.961, Testing Acc= 0.923963, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.953698\n",
      "Step 1060, Reward=3.5075424, Minibatch Loss= 0.1361, Training Accuracy= 0.930, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.720445\n",
      "Step 1120, Reward=6.5957136, Minibatch Loss= 0.0642, Training Accuracy= 0.977, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.016510\n",
      "Step 1070, Reward=3.8026567, Minibatch Loss= 0.1465, Training Accuracy= 0.930, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.622701\n",
      "Step 1130, Reward=8.539179, Minibatch Loss= 0.0873, Training Accuracy= 0.977, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.890822\n",
      "Step 1080, Reward=3.9174027, Minibatch Loss= 0.1433, Training Accuracy= 0.953, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.585327\n",
      "Step 1140, Reward=7.7314672, Minibatch Loss= 0.1697, Training Accuracy= 0.938, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.869085\n",
      "Step 1090, Reward=5.2000065, Minibatch Loss= 0.1233, Training Accuracy= 0.953, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.636008\n",
      "Step 1150, Reward=8.250413, Minibatch Loss= 0.1583, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.007259\n",
      "Step 1100, Reward=2.045374, Minibatch Loss= 0.1302, Training Accuracy= 0.953, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.797464\n",
      "Step 1160, Reward=7.416335, Minibatch Loss= 0.1261, Training Accuracy= 0.953, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.202744\n",
      "Step 1110, Reward=2.0470915, Minibatch Loss= 0.2146, Training Accuracy= 0.906, Testing Acc= 0.868664, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.571741\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1170, Reward=6.200927, Minibatch Loss= 0.1987, Training Accuracy= 0.914, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.900910\n",
      "Step 1120, Reward=4.299074, Minibatch Loss= 0.1630, Training Accuracy= 0.945, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.532848\n",
      "Step 1180, Reward=8.414896, Minibatch Loss= 0.1147, Training Accuracy= 0.977, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.033117\n",
      "Step 1130, Reward=6.8827095, Minibatch Loss= 0.1237, Training Accuracy= 0.945, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.559817\n",
      "Step 1190, Reward=6.553611, Minibatch Loss= 0.1559, Training Accuracy= 0.945, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.045490\n",
      "Step 1140, Reward=3.352686, Minibatch Loss= 0.1371, Training Accuracy= 0.961, Testing Acc= 0.884793, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.686558\n",
      "Step 1200, Reward=6.435047, Minibatch Loss= 0.1194, Training Accuracy= 0.961, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.017846\n",
      "Step 1150, Reward=3.9661639, Minibatch Loss= 0.2852, Training Accuracy= 0.883, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.819815\n",
      "Step 1210, Reward=6.522148, Minibatch Loss= 0.0670, Training Accuracy= 0.977, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.953167\n",
      "Step 1160, Reward=3.588269, Minibatch Loss= 0.1672, Training Accuracy= 0.930, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.838321\n",
      "Step 1220, Reward=7.9742966, Minibatch Loss= 0.1233, Training Accuracy= 0.961, Testing Acc= 0.889401, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.263216\n",
      "Step 1170, Reward=2.9108543, Minibatch Loss= 0.1741, Training Accuracy= 0.945, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.768864\n",
      "Step 1230, Reward=7.7941446, Minibatch Loss= 0.1393, Training Accuracy= 0.953, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.040255\n",
      "Step 1180, Reward=4.779329, Minibatch Loss= 0.1187, Training Accuracy= 0.969, Testing Acc= 0.891705, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.650437\n",
      "Step 1240, Reward=9.834394, Minibatch Loss= 0.0832, Training Accuracy= 0.969, Testing Acc= 0.914747, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.042870\n",
      "Step 1190, Reward=6.3403, Minibatch Loss= 0.1995, Training Accuracy= 0.906, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.649211\n",
      "Step 1250, Reward=6.675843, Minibatch Loss= 0.0972, Training Accuracy= 0.961, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.065133\n",
      "Step 1200, Reward=3.5461955, Minibatch Loss= 0.1419, Training Accuracy= 0.938, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.629136\n",
      "Step 1260, Reward=7.6087513, Minibatch Loss= 0.0726, Training Accuracy= 0.977, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.088475\n",
      "Step 1210, Reward=0.7585703, Minibatch Loss= 0.1343, Training Accuracy= 0.953, Testing Acc= 0.921659, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.754027\n",
      "Step 1270, Reward=6.1162214, Minibatch Loss= 0.1604, Training Accuracy= 0.953, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.003831\n",
      "Step 1220, Reward=0.34257543, Minibatch Loss= 0.2360, Training Accuracy= 0.914, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.697771\n",
      "Step 1280, Reward=5.661827, Minibatch Loss= 0.0656, Training Accuracy= 0.977, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.057627\n",
      "Step 1230, Reward=2.5826488, Minibatch Loss= 0.2338, Training Accuracy= 0.906, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.750648\n",
      "Step 1290, Reward=7.6683445, Minibatch Loss= 0.0471, Training Accuracy= 0.984, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.996009\n",
      "Step 1240, Reward=2.0138733, Minibatch Loss= 0.1324, Training Accuracy= 0.953, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.776567\n",
      "Step 1300, Reward=10.519939, Minibatch Loss= 0.2018, Training Accuracy= 0.922, Testing Acc= 0.907834, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.088627\n",
      "Step 1250, Reward=4.6722145, Minibatch Loss= 0.0533, Training Accuracy= 0.992, Testing Acc= 0.900922, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.696272\n",
      "Step 1260, Reward=7.361413, Minibatch Loss= 0.0774, Training Accuracy= 0.977, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.418915\n",
      "Step 1270, Reward=4.369075, Minibatch Loss= 0.0997, Training Accuracy= 0.977, Testing Acc= 0.898618, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.589533\n",
      "Step 1280, Reward=5.385027, Minibatch Loss= 0.2178, Training Accuracy= 0.930, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.781231\n",
      "Step 1290, Reward=2.6645494, Minibatch Loss= 0.1384, Training Accuracy= 0.953, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.723672\n",
      "Step 1310, Reward=6.4449816, Minibatch Loss= 0.1049, Training Accuracy= 0.945, Testing Acc= 0.917051, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.060203\n",
      "Step 1300, Reward=3.757676, Minibatch Loss= 0.1803, Training Accuracy= 0.930, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.858042\n",
      "Step 1320, Reward=8.95886, Minibatch Loss= 0.0962, Training Accuracy= 0.969, Testing Acc= 0.905530, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.974847\n",
      "Step 1310, Reward=7.392368, Minibatch Loss= 0.1399, Training Accuracy= 0.953, Testing Acc= 0.896313, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.747510\n",
      "Step 1330, Reward=5.909351, Minibatch Loss= 0.1870, Training Accuracy= 0.938, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  0.989046\n",
      "Step 1320, Reward=3.1347518, Minibatch Loss= 0.1161, Training Accuracy= 0.977, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.704099\n",
      "Step 1330, Reward=3.5569038, Minibatch Loss= 0.1317, Training Accuracy= 0.961, Testing Acc= 0.903226, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.738493\n",
      "Step 1340, Reward=8.634398, Minibatch Loss= 0.1000, Training Accuracy= 0.969, Testing Acc= 0.882488, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.060460\n",
      "Step 1340, Reward=4.5015125, Minibatch Loss= 0.1735, Training Accuracy= 0.922, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.718759\n",
      "Step 1350, Reward=7.7100496, Minibatch Loss= 0.0951, Training Accuracy= 0.961, Testing Acc= 0.910138, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.131455\n",
      "Step 1350, Reward=7.3970857, Minibatch Loss= 0.1594, Training Accuracy= 0.953, Testing Acc= 0.894009, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.693969\n",
      "Step 1360, Reward=8.999207, Minibatch Loss= 0.2113, Training Accuracy= 0.906, Testing Acc= 0.912442, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.071291\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1710, Reward=9.625984, Minibatch Loss= 0.1222, Training Accuracy= 0.953, Testing Acc= 0.884793, Max Final Accuracy=  0.926267, Max AUC=  0.963622, Max AP=  0.956391, Max Q=  1.232728\n",
      "Step 1710, Reward=4.7719383, Minibatch Loss= 0.0716, Training Accuracy= 0.984, Testing Acc= 0.880184, Max Final Accuracy=  0.926267, Max AUC=  0.969228, Max AP=  0.965020, Max Q=  0.532929\n",
      "Step 330, Reward=-4.0212555, Minibatch Loss= 0.2390, Training Accuracy= 0.922, Testing Acc= 0.903226, Max Final Accuracy=  0.914747, Max AUC=  0.964854, Max AP=  0.961440, Max Q=  -0.046030\n",
      "Step 730, Reward=1.9183358, Minibatch Loss= 0.1714, Training Accuracy= 0.938, Testing Acc= 0.921659, Max Final Accuracy=  0.923963, Max AUC=  0.962815, Max AP=  0.957731, Max Q=  0.690932\n"
     ]
    }
   ],
   "source": [
    "# ACTORs = [0.0001, 0.0005, 0.00005]\n",
    "# CRITICs = [0.0005, 0.0001]\n",
    "LRs = [0.0005, 0.0007, 0.001]\n",
    "DSs = [500, 250]\n",
    "DRs = [.95, .85]\n",
    "ACTORs = [0.001, 0.0005, 0.00005]\n",
    "CRITICs = [0.0005]\n",
    "\n",
    "\n",
    "pool = mp.Pool(2)\n",
    "pool.map(train, [(lr, ds, dr, a_lr, c_lr) for lr in LRs for ds in DSs for dr in DRs for a_lr in ACTORs for c_lr in CRITICs])\n",
    "pool.close()\n",
    "pool.join()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
