{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"../\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "from tensorflow.examples.tutorials.mnist import input_data\n",
    "import os \n",
    "import multiprocessing as mp\n",
    "from sklearn.metrics import roc_auc_score, average_precision_score, mean_squared_error\n",
    "import multiprocessing as mp\n",
    "from qnetwork import *\n",
    "from utils import *\n",
    "import pandas as pd\n",
    "slim = tf.contrib.slim\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "SEED = 2599\n",
    "np.random.seed(SEED)\n",
    "tf.set_random_seed(SEED)\n",
    "import random\n",
    "random.seed(SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "normal_train = np.loadtxt(\"../data/normal_train_all_35_missing_by_modality.txt\")\n",
    "abnormal_train = np.loadtxt(\"../data/abnormal_train_all_35_missing_by_modality.txt\")\n",
    "\n",
    "normal_test = np.loadtxt(\"../data/normal_test_all_35_missing_by_modality.txt\")\n",
    "abnormal_test = np.loadtxt(\"../data/abnormal_test_all_35_missing_by_modality.txt\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "normal_train = np.vstack(np.asarray([normal_train for i in range(10)]))\n",
    "abnormal_train = np.vstack(np.asarray([abnormal_train for i in range(10)]))\n",
    "normal_train = normal_train[:abnormal_train.shape[0]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_train = np.vstack([normal_train, abnormal_train]).astype(np.float32)\n",
    "data_label_train = np.concatenate([np.zeros(len(normal_train)), np.ones(len(abnormal_train))]).astype(np.int32)\n",
    "data_mask_train = np.isnan(data_train).astype(np.float32)\n",
    "\n",
    "data_test = np.vstack([normal_test, abnormal_test]).astype(np.float32)\n",
    "data_label_test = np.concatenate([np.zeros(len(normal_test)), np.ones(len(abnormal_test))]).astype(np.int32)\n",
    "data_mask_test = np.isnan(data_test).astype(np.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "nan_replacement = 0."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_train[np.isnan(data_train)] = nan_replacement\n",
    "data_test[np.isnan(data_test)] = nan_replacement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([1.96707608, 1.50105805, 0.89303454, 0.44501721, 0.19950772,\n",
       "        0.21050814, 0.43001663, 0.91603543, 1.54655981, 1.89157316]),\n",
       " array([2.06842961e-05, 1.00016817e-01, 2.00012949e-01, 3.00009082e-01,\n",
       "        4.00005214e-01, 5.00001347e-01, 5.99997479e-01, 6.99993612e-01,\n",
       "        7.99989744e-01, 8.99985877e-01, 9.99982009e-01]),\n",
       " <a list of 10 Patch objects>)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD8CAYAAABw1c+bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEupJREFUeJzt3X+MXWd95/H3p04CasmCwUMXxTYTtG5FoJDQkaHKagkqGBO2cauyW1ulBBTWEiXt9ocqmVZKqkSV0qLdSqhpg2mtlNWS0NLSThtDcPnRtKVmPYE0kNAU46ZkZCRPcZqWhpI6fPePeyJdxjOeMzN35mb8vF/Slc95nuec+31s6zNnzj33nFQVkqR2fMe4C5AkrS+DX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYC8ZdwEK2bNlSk5OT4y5DkjaMe++99x+raqLP2Kdl8E9OTjIzMzPuMiRpw0jyD33HeqpHkhpj8EtSY5YM/iTbknwyyReTPJDkfy4wJknek+R4kvuTvGKo79okX+pe1456ApKk5elzjv8M8PNV9dkkFwP3JjlSVQ8OjXkDsKN7vRL4LeCVSZ4L3AhMAdVtO11Vj450FpKk3pY84q+qr1bVZ7vlfwG+CFwyb9ge4P01cBR4TpIXAK8HjlTV6S7sjwC7RzoDSdKyLOscf5JJ4ArgM/O6LgEeGVqf7doWa19o3/uTzCSZmZubW05ZkqRl6B38SZ4F/AHwM1X1z/O7F9ikztF+dmPVwaqaqqqpiYlel6JKklagV/AnuZBB6P/fqvrDBYbMAtuG1rcCJ8/RLkkakz5X9QT4HeCLVfW/Fxk2Dbylu7rnVcBjVfVV4G5gV5LNSTYDu7o2SdKY9Lmq50rgJ4DPJ7mva/tFYDtAVd0GHAauBo4DjwNv6/pOJ7kZONZtd1NVnR5d+WebPHDXWu5+UQ/f8saxvK8kLdeSwV9Vf8nC5+qHxxTwzkX6DgGHVlSdJGnk/OauJDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaszT8glckjRO5/v3gTzil6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYJW/ZkOQQ8F+BU1X10gX6fwH48aH9vRiY6B67+DDwL8CTwJmqmhpV4ZKklelzxH87sHuxzqp6d1VdXlWXA+8C/nzec3Vf0/Ub+pL0NLBk8FfVPUDfB6TvA+5YVUWSpDU1snP8Sb6TwW8GfzDUXMDHktybZP+o3kuStHKjvC3zDwF/Ne80z5VVdTLJ84EjSf62+w3iLN0Phv0A27dvH2FZkqRho7yqZy/zTvNU1cnuz1PAh4Gdi21cVQeraqqqpiYmJkZYliRp2EiCP8mzgVcDfzzU9l1JLn5qGdgFfGEU7ydJWrk+l3PeAVwFbEkyC9wIXAhQVbd1w34E+FhV/evQpt8NfDjJU+/zgar66OhKlyStxJLBX1X7eoy5ncFln8NtJ4CXr7QwSdLa8Ju7ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGjPJh65I0MpMH7hp3CeetJY/4kxxKcirJgs/LTXJVkseS3Ne9bhjq253koSTHkxwYZeGSpJXpc6rndmD3EmP+oqou7143ASTZBNwKvAG4DNiX5LLVFCtJWr0lg7+q7gFOr2DfO4HjVXWiqp4A7gT2rGA/kqQRGtWHuz+Q5G+SfCTJS7q2S4BHhsbMdm2SpDEaxYe7nwVeWFVfT3I18EfADiALjK3FdpJkP7AfYPv27SMoa32N84Ooh29549jeW9LGs+oj/qr656r6erd8GLgwyRYGR/jbhoZuBU6eYz8Hq2qqqqYmJiZWW5YkaRGrDv4k/zFJuuWd3T6/BhwDdiS5NMlFwF5gerXvJ0lanSVP9SS5A7gK2JJkFrgRuBCgqm4D3gS8I8kZ4BvA3qoq4EyS64G7gU3Aoap6YE1mIUnqbcngr6p9S/T/BvAbi/QdBg6vrDRJ0lrwlg2S1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhqzZPAnOZTkVJIvLNL/40nu716fTvLyob6Hk3w+yX1JZkZZuCRpZfoc8d8O7D5H/98Dr66qlwE3Awfn9b+mqi6vqqmVlShJGqU+D1u/J8nkOfo/PbR6FNi6+rIkSWtl1Of4rwM+MrRewMeS3Jtk/7k2TLI/yUySmbm5uRGXJUl6ypJH/H0leQ2D4P/PQ81XVtXJJM8HjiT526q6Z6Htq+og3WmiqampGlVdkqRvN5Ij/iQvA34b2FNVX3uqvapOdn+eAj4M7BzF+0mSVm7VwZ9kO/CHwE9U1d8NtX9XkoufWgZ2AQteGSRJWj9LnupJcgdwFbAlySxwI3AhQFXdBtwAPA/4zSQAZ7oreL4b+HDXdgHwgar66BrMQZK0DH2u6tm3RP/bgbcv0H4CePnZW0iSxslv7kpSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjegV/kkNJTiVZ8Jm5GXhPkuNJ7k/yiqG+a5N8qXtdO6rCJUkr0/eI/3Zg9zn63wDs6F77gd8CSPJcBs/ofSWwE7gxyeaVFitJWr1ewV9V9wCnzzFkD/D+GjgKPCfJC4DXA0eq6nRVPQoc4dw/QCRJa2xU5/gvAR4ZWp/t2hZrlySNyaiCPwu01Tnaz95Bsj/JTJKZubm5EZUlSZpvVME/C2wbWt8KnDxH+1mq6mBVTVXV1MTExIjKkiTNN6rgnwbe0l3d8yrgsar6KnA3sCvJ5u5D3V1dmyRpTC7oMyjJHcBVwJYkswyu1LkQoKpuAw4DVwPHgceBt3V9p5PcDBzrdnVTVZ3rQ2JJ0hrrFfxVtW+J/gLeuUjfIeDQ8kuTJK0Fv7krSY0x+CWpMQa/JDWm1zl+Se2aPHDXuEvQiHnEL0mNMfglqTGe6jkPjOtX8YdveeNY3lfS6njEL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxvYI/ye4kDyU5nuTAAv2/nuS+7vV3Sf5pqO/Job7pURYvSVq+Je/Vk2QTcCvwOmAWOJZkuqoefGpMVf3s0PifAq4Y2sU3qury0ZUsSVqNPkf8O4HjVXWiqp4A7gT2nGP8PuCOURQnSRq9PsF/CfDI0Pps13aWJC8ELgU+MdT8zCQzSY4m+eEVVypJGok+t2XOAm21yNi9wIeq6smhtu1VdTLJi4BPJPl8VX35rDdJ9gP7AbZv396jLEnSSvQ54p8Ftg2tbwVOLjJ2L/NO81TVye7PE8Cn+Pbz/8PjDlbVVFVNTUxM9ChLkrQSfYL/GLAjyaVJLmIQ7mddnZPke4HNwF8PtW1O8oxueQtwJfDg/G0lSetnyVM9VXUmyfXA3cAm4FBVPZDkJmCmqp76IbAPuLOqhk8DvRh4b5JvMfghc8vw1UCSpPXX69GLVXUYODyv7YZ567+8wHafBr5vFfVJkkbMb+5KUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWpMr+BPsjvJQ0mOJzmwQP9bk8wlua97vX2o79okX+pe146yeEnS8i35zN0km4BbgdcBs8CxJNMLPDT9g1V1/bxtnwvcCEwBBdzbbfvoSKqXJC1bnyP+ncDxqjpRVU8AdwJ7eu7/9cCRqjrdhf0RYPfKSpUkjUKf4L8EeGRofbZrm+9Hk9yf5ENJti1zW5LsTzKTZGZubq5HWZKklegT/Fmgreat/wkwWVUvA/4M+N1lbDtorDpYVVNVNTUxMdGjLEnSSvQJ/llg29D6VuDk8ICq+lpVfbNbfR/w/X23lSStrz7BfwzYkeTSJBcBe4Hp4QFJXjC0eg3wxW75bmBXks1JNgO7ujZJ0pgseVVPVZ1Jcj2DwN4EHKqqB5LcBMxU1TTw00muAc4Ap4G3dtueTnIzgx8eADdV1ek1mIckqaclgx+gqg4Dh+e13TC0/C7gXYtsewg4tIoaJUkj1Cv4pYVMHrhrbO/98C1vHNt7Sxudt2yQpMZ4xC9tAOP87UrnH4/4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjegV/kt1JHkpyPMmBBfp/LsmDSe5P8vEkLxzqezLJfd1rev62kqT1teT9+JNsAm4FXgfMAseSTFfVg0PDPgdMVdXjSd4B/BrwY13fN6rq8hHXLUlaoT5H/DuB41V1oqqeAO4E9gwPqKpPVtXj3epRYOtoy5QkjUqf4L8EeGRofbZrW8x1wEeG1p+ZZCbJ0SQ/vNhGSfZ342bm5uZ6lCVJWok+j17MAm214MDkzcAU8Oqh5u1VdTLJi4BPJPl8VX35rB1WHQQOAkxNTS24f0nS6vU54p8Ftg2tbwVOzh+U5LXALwHXVNU3n2qvqpPdnyeATwFXrKJeSdIq9Qn+Y8COJJcmuQjYC3zb1TlJrgDeyyD0Tw21b07yjG55C3AlMPyhsCRpnS15qqeqziS5Hrgb2AQcqqoHktwEzFTVNPBu4FnA7ycB+EpVXQO8GHhvkm8x+CFzy7yrgSRJ66zPOX6q6jBweF7bDUPLr11ku08D37eaAqWnk8kDd427BGnVegW/9HRjAEsr5y0bJKkxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mN6RX8SXYneSjJ8SQHFuh/RpIPdv2fSTI51Peurv2hJK8fXemSpJVYMviTbAJuBd4AXAbsS3LZvGHXAY9W1X8Cfh341W7byxg8nP0lwG7gN7v9SZLGpM8R/07geFWdqKongDuBPfPG7AF+t1v+EPCDGTx1fQ9wZ1V9s6r+Hjje7U+SNCZ9gv8S4JGh9dmubcExVXUGeAx4Xs9tJUnrqM/D1rNAW/Uc02fbwQ6S/cD+bvXrSR7qUdtCtgD/uMJtNyrnfP5rbb7Q4Jzzq6ua8wv7DuwT/LPAtqH1rcDJRcbMJrkAeDZwuue2AFTVQeBgv7IXl2SmqqZWu5+NxDmf/1qbLzjntdTnVM8xYEeSS5NcxODD2ul5Y6aBa7vlNwGfqKrq2vd2V/1cCuwA/t9oSpckrcSSR/xVdSbJ9cDdwCbgUFU9kOQmYKaqpoHfAf5PkuMMjvT3dts+kOT3gAeBM8A7q+rJNZqLJKmHPqd6qKrDwOF5bTcMLf8b8N8W2fZXgF9ZRY3LterTRRuQcz7/tTZfcM5rJoMzMpKkVnjLBklqzIYN/tXcRmIj6jHfn0vyYJL7k3w8Se9Lu56ulprz0Lg3JakkG/4KkD5zTvLfu3/rB5J8YL1rHLUe/7e3J/lkks91/7+vHkedo5LkUJJTSb6wSH+SvKf7+7g/yStGXkRVbbgXgw+Zvwy8CLgI+BvgsnljfhK4rVveC3xw3HWv8XxfA3xnt/yOjTzfvnPuxl0M3AMcBabGXfc6/DvvAD4HbO7Wnz/uutdhzgeBd3TLlwEPj7vuVc75vwCvAL6wSP/VwEcYfA/qVcBnRl3DRj3iX81tJDaiJedbVZ+sqse71aMMvjOxkfX5Nwa4Gfg14N/Ws7g10mfO/wO4taoeBaiqU+tc46j1mXMB/6FbfjaLfBdoo6iqexhc/biYPcD7a+Ao8JwkLxhlDRs1+FdzG4mNaLm3vriOwRHDRrbknJNcAWyrqj9dz8LWUJ9/5+8BvifJXyU5mmT3ulW3NvrM+ZeBNyeZZXB14U+tT2ljs+a3uul1OefT0GpuI7ERLefWF28GpoBXr2lFa++cc07yHQzuBPvW9SpoHfT5d76Awemeqxj8VvcXSV5aVf+0xrWtlT5z3gfcXlX/K8kPMPjO0Eur6ltrX95YrHl2bdQj/uXcRoJ5t5HYiHrd+iLJa4FfAq6pqm+uU21rZak5Xwy8FPhUkocZnAud3uAf8Pb9f/3HVfXvNbjj7UMMfhBsVH3mfB3wewBV9dfAMxncx+d81ftWNyu1UYN/NbeR2IiWnG932uO9DEJ/o5/3hSXmXFWPVdWWqpqsqkkGn2tcU1Uz4yl3JPr8v/4jBh/kk2QLg1M/J9a1ytHqM+evAD8IkOTFDIJ/bl2rXF/TwFu6q3teBTxWVV8d5RtsyFM9tYrbSGxEPef7buBZwO93n2F/paquGVvRq9RzzueVnnO+G9iV5EHgSeAXqupr46t6dXrO+eeB9yX5WQanPN66gQ/iSHIHg1N1W7rPLW4ELgSoqtsYfI5xNYPnlzwOvG3kNWzgvz9J0gps1FM9kqQVMvglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWrM/weW0s/QV9+kXgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as stats\n",
    "\n",
    "lower, upper = 0, 1\n",
    "mu, sigma = 0, 0.2\n",
    "left_truncnorm = stats.truncnorm(\n",
    "    (lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma)\n",
    "right_truncnorm = stats.truncnorm(\n",
    "    (lower - 1.) / sigma, (upper - 1.) / sigma, loc=1., scale=sigma)\n",
    "\n",
    "fig, ax = plt.subplots(1, sharex=True)\n",
    "ax.hist(np.concatenate([left_truncnorm.rvs(10000),right_truncnorm.rvs(10000)]), normed=True)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(args):\n",
    "    start_learning_rate, decay_step, decay_rate, rl_actor, rl_critic = args\n",
    "\n",
    "\n",
    "    # Training Parameters\n",
    "#     start_learning_rate = 0.005\n",
    "#     decay_step = 500\n",
    "#     decay_rate = .9\n",
    "\n",
    "    weights = [1000, 1000]\n",
    "\n",
    "    rl_reward_thres_for_decay = 0\n",
    "\n",
    "    session_config = tf.ConfigProto(log_device_placement=False)\n",
    "    session_config.gpu_options.allow_growth = True\n",
    "\n",
    "    training_steps = 3000\n",
    "    batch_size = 128\n",
    "\n",
    "    # Network Parameters\n",
    "    num_input = 4101 # MNIST data input (img shape: 28*28)\n",
    "    timesteps = 1 # timesteps\n",
    "#     num_hidden = 128 # hidden layer num of features\n",
    "    num_classes = 2 # MNIST total classes (0-9 digits)\n",
    "\n",
    "    display_step = 10\n",
    "\n",
    "    gpu = 0\n",
    "\n",
    "    graph = tf.Graph()\n",
    "\n",
    "    file_appendix = \"Contrastive_CFP_OCT_MissingData_35_missing_by_modality_more_demo_missing_FCRL_maskGradients_\" + str(start_learning_rate) + \"_\" + str(decay_step) + \"_\" + str(decay_rate) + \"_\" + str(weights[0]) + \"_\" + str(weights[1]) + \"_\" + str(rl_actor) + \"_\" + str(rl_critic)\n",
    "\n",
    "    def build_net(x, is_training=True, reuse=tf.AUTO_REUSE, graph=graph):\n",
    "\n",
    "        with graph.as_default():\n",
    "\n",
    "            with tf.variable_scope(\"NN\", reuse=tf.AUTO_REUSE) as scope:\n",
    "                with slim.arg_scope([slim.fully_connected], \n",
    "                                        activation_fn=tf.nn.relu,\n",
    "                                        weights_initializer=tf.random_uniform_initializer(0.001, 0.01),\n",
    "                                        weights_regularizer=slim.l2_regularizer(0.1),\n",
    "                                        biases_regularizer=slim.l2_regularizer(0.1),\n",
    "                                        normalizer_fn = slim.batch_norm,\n",
    "                                        normalizer_params = {\"is_training\": is_training},\n",
    "                                        reuse = reuse,\n",
    "                                        scope = scope):\n",
    "\n",
    "                    fc1 = slim.fully_connected(x, weights[0], scope='fc1')\n",
    "                    fc2 = slim.fully_connected(fc1, weights[1], scope='fc2')\n",
    "                    logits = slim.fully_connected(fc2,num_classes,activation_fn=None, weights_regularizer=None, normalizer_fn=None, scope='logits')\n",
    "                    pred = slim.softmax(logits, scope='pred')\n",
    "\n",
    "                    return logits, pred, fc1\n",
    "\n",
    "\n",
    "    def gen_train():\n",
    "        for i in range(data_train.shape[0]/128):\n",
    "            outs_normal = data_train[i*64:(i+1)*64]\n",
    "            labels_normal = np.asarray([np.array([1.,0.]) for _ in range(64)])\n",
    "            masks_normal = data_mask_train[i*64:(i+1)*64]\n",
    "            \n",
    "            outs_abnormal = data_train[len(normal_train)+i*64:len(normal_train)+(i+1)*64]\n",
    "            labels_abnormal = np.asarray([np.array([0.,1.]) for _ in range(64)])\n",
    "            masks_abnormal = data_mask_train[len(normal_train)+i*64:len(normal_train)+(i+1)*64]\n",
    "\n",
    "            yield np.vstack([outs_normal, outs_abnormal]), np.vstack([labels_normal, labels_abnormal]), np.vstack([masks_normal, masks_abnormal])\n",
    "\n",
    "    def gen_test():\n",
    "        for i in range(data_test.shape[0]):\n",
    "            label = np.zeros(2)\n",
    "            label[data_label_test[i]] = 1.\n",
    "            yield data_test[i], label, data_mask_test[i]\n",
    "\n",
    "\n",
    "    with graph.as_default():\n",
    "\n",
    "        dataset_train = tf.data.Dataset.from_generator(gen_train, (tf.float32, tf.float32, tf.int32), ([batch_size, 4101],[batch_size, 2],[batch_size, 4101])).repeat(10).shuffle(50)\n",
    "        input_train, label_train, mask_train = dataset_train.make_one_shot_iterator().get_next()\n",
    "\n",
    "        dataset_test = tf.data.Dataset.from_generator(gen_test, (tf.float32, tf.float32, tf.int32), ([4101],[ 2],[4101])).repeat(30000).batch(data_test.shape[0])\n",
    "        input_test, label_test, mask_test = dataset_test.make_one_shot_iterator().get_next()\n",
    "\n",
    "        input_train_holder = tf.placeholder(shape=[batch_size, num_input*timesteps], dtype=tf.float32)\n",
    "        label_train_holder = tf.placeholder(shape=[batch_size, num_classes], dtype=tf.float32)\n",
    "        mask_train_holder = tf.placeholder(shape=[batch_size, num_input*timesteps], dtype=tf.int32)\n",
    "        logits, prediction, feature = build_net(input_train_holder)\n",
    "\n",
    "        all_test = data_test\n",
    "\n",
    "        logits_final, pred_final, _ = build_net(input_test, is_training=False)\n",
    "\n",
    "        fc_variables = [v for v in tf.trainable_variables() if v.name.find(\"NN\")!=-1]\n",
    "\n",
    "        # Define loss and optimizer\n",
    "        loss_op = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label_train_holder) + tf.reduce_mean(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope=\"NN\"))\n",
    "        loss_mean = tf.reduce_mean(loss_op, axis=0)\n",
    "        learning_rate = tf.train.exponential_decay(start_learning_rate, tf.train.get_or_create_global_step(), decay_steps=decay_step, decay_rate=decay_rate)\n",
    "        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\n",
    "\n",
    "        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)\n",
    "\n",
    "        grads = tf.vectorized_map(lambda x: optimizer.compute_gradients(x, fc_variables), loss_op)\n",
    "        grads = [g[0] for g in grads]\n",
    "\n",
    "        grad_attention = tf.placeholder(shape=[batch_size, num_input*timesteps], dtype=tf.float32)\n",
    "    #     not_mask_train = 1-tf.cast(mask_train, tf.float32)\n",
    "        grads[0] = grads[0]*grad_attention[...,tf.newaxis]\n",
    "\n",
    "        grads = [tf.reduce_mean(g,axis=0) for g in grads]\n",
    "        \n",
    "        with tf.control_dependencies(update_ops):\n",
    "            grads_update_op = optimizer.apply_gradients(zip(grads, fc_variables))\n",
    "\n",
    "        train_correct_pred = tf.equal(tf.cast(tf.argmax(prediction, 1),tf.float32), tf.cast(tf.argmax(label_train_holder, 1),tf.float32) )\n",
    "        train_accuracy = tf.reduce_mean(tf.cast(train_correct_pred, tf.float32))\n",
    "        train_kld = tf.keras.losses.KLDivergence()(prediction, label_train_holder)\n",
    "\n",
    "        final_correct_pred = tf.equal(tf.argmax(pred_final, 1), tf.argmax(label_test, 1))\n",
    "        final_accuracy = tf.reduce_mean(tf.cast(final_correct_pred, tf.float32))\n",
    "        final_kld = tf.keras.losses.KLDivergence()(pred_final, label_test)\n",
    "\n",
    "        max_final_acc = tf.Variable(0, dtype=tf.float32, name=\"max_final_acc\", trainable=False)\n",
    "        assign_max_final_acc = max_final_acc.assign(final_accuracy)\n",
    "        \n",
    "        final_score = pred_final[:,1]\n",
    "\n",
    "    #     final_score = logits_final[:,1]\n",
    "\n",
    "        # Initialize the variables (i.e. assign their default values\n",
    "\n",
    "\n",
    "\n",
    "    with graph.as_default():\n",
    "        actor = Actor(graph=graph, state_dim=num_input*timesteps*2+weights[0]+num_classes, action_dim=num_input*timesteps, learning_rate=rl_actor, tau=0.001, batch_size=batch_size, save_path=\"./saved_model/\"+file_appendix+\"/actor.ckpt\")\n",
    "        critic = Critic(graph=graph, state_dim=num_input*timesteps*2+weights[0]+num_classes, action_dim=num_input*timesteps, learning_rate=rl_critic, tau=0.001, gamma=0.99, save_path=\"./saved_model/\"+file_appendix+\"/critic.ckpt\")\n",
    "        init = tf.global_variables_initializer()\n",
    "        saver = tf.train.Saver()\n",
    "\n",
    "\n",
    "    # Start training\n",
    "    with tf.Session(config=session_config, graph=graph) as sess:\n",
    "        if os.path.exists(os.path.join(\"saved_model\",file_appendix)):\n",
    "            actor.saver.restore(sess, os.path.join(\"saved_model\",file_appendix,\"actor.ckpt\"))\n",
    "            critic.saver.restore(sess, os.path.join(\"saved_model\",file_appendix,\"critic.ckpt\"))\n",
    "            saver.restore(sess, os.path.join(\"saved_model\",file_appendix,\"best.ckpt\"))\n",
    "            log = pd.read_csv(os.path.join(\"stats\",\"rl_log\",file_appendix+\".txt\"), delimiter=\",\", header=None)\n",
    "            steps_run = int(log.iloc[-1][0].split(\" \")[1])\n",
    "            EXPLORATION_RATE = 0.4*(0.9999**(steps_run))\n",
    "            GUIDE_RATE = .5*(0.999**(steps_run))\n",
    "            if GUIDE_RATE < .1:\n",
    "                GUIDE_RATE = .1\n",
    "            if EXPLORATION_RATE < .1:\n",
    "                EXPLORATION_RATE = .1\n",
    "            start_learning_rate = start_learning_rate*(decay_rate**float(steps_run/decay_step))\n",
    "        else:\n",
    "            steps_run = 0\n",
    "            sess.run(init)\n",
    "            EXPLORATION_RATE = 0.4\n",
    "            GUIDE_RATE = .5\n",
    "            steps_run = 0\n",
    "        ep_reward = 0\n",
    "        ep_ave_max_q = 0\n",
    "\n",
    "        data_in, label_in, s_mask = sess.run([input_train, label_train, mask_train])\n",
    "        s_1, s_2 = sess.run([logits, feature], feed_dict = {input_train_holder:data_in, label_train_holder:label_in, mask_train_holder:s_mask})\n",
    "        s = np.hstack([data_in,s_mask,s_1,s_2])\n",
    "\n",
    "        reward_list = []\n",
    "        ave_max_q_list = []\n",
    "        replay_buffer = ReplayBuffer(10**4, random_seed=SEED)\n",
    "\n",
    "        # Run the initializer\n",
    "\n",
    "\n",
    "        max_auc = 0.\n",
    "        max_ap = 0.\n",
    "        max_acc = 0.\n",
    "        min_kld = 1000.\n",
    "\n",
    "        actor.update_target_network(sess)\n",
    "        critic.update_target_network(sess)\n",
    "\n",
    "        for step in range(steps_run, training_steps):\n",
    "            rand_num = np.random.rand(1)\n",
    "\n",
    "            if rand_num <= EXPLORATION_RATE:\n",
    "                a = np.concatenate([left_truncnorm.rvs(num_input*(timesteps//2)*batch_size),right_truncnorm.rvs(num_input*(timesteps//2+1)*batch_size)])\n",
    "                np.random.shuffle(a)\n",
    "                a = a.reshape(batch_size,-1).astype(np.float32)\n",
    "\n",
    "            elif rand_num <= GUIDE_RATE+EXPLORATION_RATE and rand_num > EXPLORATION_RATE:\n",
    "                a = (1-s_mask).astype(np.float32)\n",
    "\n",
    "            else:\n",
    "                a = actor.predict(s, sess)\n",
    "    #             print \"a: \", a\n",
    "    #             print \"mask: \", s_mask\n",
    "\n",
    "            features, _, kld, test_kld = sess.run([feature, grads_update_op, train_kld, final_kld], feed_dict={grad_attention:a, input_train_holder:data_in, label_train_holder:label_in, mask_train_holder:s_mask})\n",
    "            acc = sess.run([final_accuracy])\n",
    "            data_in, label_in, s2_mask = sess.run([input_train, label_train, mask_train])\n",
    "            s2_1, s2_2 = sess.run([logits, feature], feed_dict = {input_train_holder:data_in, label_train_holder:label_in})\n",
    "            s2 = np.hstack([data_in,s2_mask,s2_1,s2_2])\n",
    "\n",
    "    #         r = np.repeat(acc + auc + ap - 3., batch_size)\n",
    "            r = np.repeat(-kld, batch_size)\n",
    "            r_mse = mean_squared_error(features[:batch_size/4, :], features[batch_size/2:batch_size*3/4, :]) + \\\n",
    "                        mean_squared_error(features[batch_size/4:batch_size/2, :], features[batch_size*3/4:, :]) - \\\n",
    "                        mean_squared_error(features[:batch_size/4, :],features[batch_size/4:batch_size/2, :]) - \\\n",
    "                        mean_squared_error(features[batch_size/2:batch_size*3/4, :],features[batch_size*3/4:, :])\n",
    "            r += 10*r_mse\n",
    "            replay_buffer.add_batch([list(i) for i in zip(s,a,r,s2)])\n",
    "\n",
    "            if replay_buffer.size() > batch_size:\n",
    "                s_batch, a_batch, r_batch, s2_batch = replay_buffer.sample_batch(batch_size)\n",
    "\n",
    "                # Calculate targets\n",
    "                target_q = critic.predict_target(\n",
    "                    s2_batch, actor.predict_target(s2_batch, sess), sess)\n",
    "\n",
    "                y_i = []\n",
    "                for k in range(batch_size):\n",
    "                    y_i.append(r_batch[k] + critic.gamma * target_q[k])\n",
    "\n",
    "                # Update the critic given the targets\n",
    "                predicted_q_value, _ = critic.train(\n",
    "                    s_batch, a_batch, np.reshape(y_i, (batch_size, 1)), step, sess)\n",
    "\n",
    "                ave_max_q = np.amax(predicted_q_value)\n",
    "                ave_max_q_list += [ave_max_q]\n",
    "\n",
    "                # Update the actor policy using the sampled gradient\n",
    "                a_outs = actor.predict(s_batch, sess)\n",
    "                grads = critic.action_gradients(s_batch, a_outs, sess)\n",
    "                actor.train(s_batch, grads[0], step, sess)\n",
    "\n",
    "                # Update target networks\n",
    "                actor.update_target_network(sess)\n",
    "                critic.update_target_network(sess)\n",
    "\n",
    "            s = s2\n",
    "            s_mask = s2_mask\n",
    "\n",
    "            reward_list += [r[0]]\n",
    "            \n",
    "#             if EXPLORATION_RATE > 0.2:\n",
    "#                 EXPLORATION_RATE = EXPLORATION_RATE * 0.999\n",
    "#             if GUIDE_RATE > 0.3:\n",
    "#                 GUIDE_RATE = GUIDE_RATE * 0.999\n",
    "            if EXPLORATION_RATE > 0.1:\n",
    "                EXPLORATION_RATE = EXPLORATION_RATE * 0.999\n",
    "            if GUIDE_RATE > 0.1:\n",
    "                GUIDE_RATE = GUIDE_RATE * 0.999\n",
    "\n",
    "\n",
    "            if step % display_step == 0 and step > 0:\n",
    "                # Calculate batch loss and accuracy\n",
    "                loss, acc, train_acc = sess.run([loss_mean, final_accuracy, train_accuracy], feed_dict = {input_train_holder:data_in, label_train_holder:label_in})\n",
    "                auc = roc_auc_score(data_label_test, final_score.eval())\n",
    "                ap = average_precision_score(data_label_test, final_score.eval())\n",
    "                if np.mean(reward_list[-display_step:]) >= rl_reward_thres_for_decay:\n",
    "                    actor.decay_learning_rate(0.965, sess)\n",
    "                    critic.decay_learning_rate(0.965, sess)\n",
    "\n",
    "                if acc > max_acc:\n",
    "                    max_acc = acc\n",
    "                    max_auc = auc\n",
    "                    max_ap = ap\n",
    "                    min_kld = test_kld\n",
    "                    sess.run(assign_max_final_acc)\n",
    "                    saver.save(sess, \"./saved_model/\"+file_appendix+\"/best.ckpt\")\n",
    "                print \"Step \" + str(step) + \", Reward=\" + str(np.sum(reward_list[-display_step:])) +\\\n",
    "                      \", Minibatch Loss= \" + \\\n",
    "                      \"{:.4f}\".format(loss) + \", Training Accuracy= \" + \\\n",
    "                      \"{:.3f}\".format(train_acc) + \", Testing Acc= \" + \"{:3f}\".format(acc) + \\\n",
    "                      \", Max Final Accuracy= \", \"{:6f}\".format(max_final_acc.eval()) + \\\n",
    "                      \", Max AUC= \", \"{:6f}\".format(max_auc) + \\\n",
    "                      \", Max AP= \", \"{:6f}\".format(max_ap)\n",
    "                with open(\"./stats/rl_log/\" + file_appendix + \".txt\", \"ab\") as myfile:\n",
    "                    myfile.write(\"Step \" + str(step) + \", Reward=\" + str(np.sum(reward_list[-display_step:])) + \", Minibatch Loss= \" + \"{:.4f}\".format(loss) + \", Training Accuracy= \" + \"{:.3f}\".format(train_acc) + \", Testing Acc= \" + \"{:3f}\".format(final_accuracy.eval()) + \", Max Final Accuracy= \" + \"{:6f}\".format(max_final_acc.eval()) + \", Exploration= \" + \"{:6f}\".format(EXPLORATION_RATE) + \", Guide= \" + \"{:6f}\".format(GUIDE_RATE) + \"\\n\")\n",
    "        print \"Optimization Finished!\"\n",
    "\n",
    "        print \"Testing Accuracy:\", sess.run(max_final_acc)\n",
    "        with open(\"./stats/Contrastive_CFP_OCT_MissingData_35_missing_by_modality_more_demo_missing_FCRL_maskGradients.txt\", \"ab\") as myfile:\n",
    "            myfile.write(\"%.9f\\t%i\\t%.3f\\t%i\\t%i\\t%.9f\\t%.9f\\t%.6f\\t%.6f\\t%.6f\\n\" %(start_learning_rate, decay_step, decay_rate, weights[0], weights[1], rl_actor, rl_critic, max_final_acc.eval(), max_auc, max_ap))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "W0524 17:49:58.515624 139654914025280 deprecation.py:323] From <ipython-input-19-23bb03a2aa10>:79: make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.\n",
      "W0524 17:49:58.515624 139654914025280 deprecation.py:323] From <ipython-input-19-23bb03a2aa10>:79: make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.\n",
      "W0524 17:49:58.571984 139654914025280 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1866: apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n",
      "W0524 17:49:58.571990 139654914025280 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1866: apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `layer.__call__` method instead.\n",
      "W0524 17:49:58.698106 139654914025280 deprecation.py:323] From <ipython-input-19-23bb03a2aa10>:96: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "\n",
      "Future major versions of TensorFlow will allow gradients to flow\n",
      "into the labels input on backprop by default.\n",
      "\n",
      "See `tf.nn.softmax_cross_entropy_with_logits_v2`.\n",
      "\n",
      "W0524 17:49:58.698101 139654914025280 deprecation.py:323] From <ipython-input-19-23bb03a2aa10>:96: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "\n",
      "Future major versions of TensorFlow will allow gradients to flow\n",
      "into the labels input on backprop by default.\n",
      "\n",
      "See `tf.nn.softmax_cross_entropy_with_logits_v2`.\n",
      "\n",
      "W0524 17:49:59.864281 139654914025280 deprecation.py:323] From ../qnetwork.py:37: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Deprecated in favor of operator or tf.math.divide.\n",
      "W0524 17:49:59.868046 139654914025280 deprecation.py:323] From ../qnetwork.py:37: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Deprecated in favor of operator or tf.math.divide.\n",
      "W0524 17:49:59.950947 139654914025280 module_wrapper.py:139] From ../qnetwork.py:47: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.\n",
      "\n",
      "W0524 17:49:59.953512 139654914025280 module_wrapper.py:139] From ../qnetwork.py:47: The name tf.train.Saver is deprecated. Please use tf.compat.v1.train.Saver instead.\n",
      "\n",
      "W0524 17:50:00.542761 139654914025280 module_wrapper.py:139] From ../qnetwork.py:137: The name tf.losses.mean_squared_error is deprecated. Please use tf.compat.v1.losses.mean_squared_error instead.\n",
      "\n",
      "W0524 17:50:00.544727 139654914025280 module_wrapper.py:139] From ../qnetwork.py:137: The name tf.losses.mean_squared_error is deprecated. Please use tf.compat.v1.losses.mean_squared_error instead.\n",
      "\n",
      "W0524 17:50:00.553045 139654914025280 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/losses/losses_impl.py:121: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
      "W0524 17:50:00.554867 139654914025280 deprecation.py:323] From /home/gaoqitong/anaconda2/lib/python2.7/site-packages/tensorflow_core/python/ops/losses/losses_impl.py:121: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 10, Reward=-41.072407, Minibatch Loss= 1.0469, Training Accuracy= 0.656, Testing Acc= 0.640351, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 10, Reward=-41.072414, Minibatch Loss= 1.0469, Training Accuracy= 0.656, Testing Acc= 0.640351, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 20, Reward=-53.069187, Minibatch Loss= 0.7551, Training Accuracy= 0.609, Testing Acc= 0.578947, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 20, Reward=-53.06811, Minibatch Loss= 0.7552, Training Accuracy= 0.609, Testing Acc= 0.578947, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 30, Reward=-46.20962, Minibatch Loss= 0.6444, Training Accuracy= 0.797, Testing Acc= 0.631579, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 30, Reward=-46.191536, Minibatch Loss= 0.6441, Training Accuracy= 0.797, Testing Acc= 0.640351, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 40, Reward=-32.15292, Minibatch Loss= 0.5835, Training Accuracy= 0.773, Testing Acc= 0.614035, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 40, Reward=-32.093258, Minibatch Loss= 0.5830, Training Accuracy= 0.773, Testing Acc= 0.614035, Max Final Accuracy=  0.640351, Max AUC=  0.764851, Max AP=  0.824902\n",
      "Step 50, Reward=-42.459297, Minibatch Loss= 0.6385, Training Accuracy= 0.758, Testing Acc= 0.754386, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 50, Reward=-42.799484, Minibatch Loss= 0.6413, Training Accuracy= 0.766, Testing Acc= 0.763158, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 60, Reward=-38.664726, Minibatch Loss= 0.6513, Training Accuracy= 0.727, Testing Acc= 0.543860, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 60, Reward=-39.187435, Minibatch Loss= 0.6557, Training Accuracy= 0.711, Testing Acc= 0.596491, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 70, Reward=-37.42587, Minibatch Loss= 0.7233, Training Accuracy= 0.773, Testing Acc= 0.754386, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 70, Reward=-37.575462, Minibatch Loss= 0.6890, Training Accuracy= 0.812, Testing Acc= 0.587719, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 80, Reward=-27.474478, Minibatch Loss= 0.6160, Training Accuracy= 0.812, Testing Acc= 0.526316, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 80, Reward=-28.110691, Minibatch Loss= 0.6045, Training Accuracy= 0.805, Testing Acc= 0.500000, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 90, Reward=-33.009083, Minibatch Loss= 0.6412, Training Accuracy= 0.797, Testing Acc= 0.745614, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 90, Reward=-31.901283, Minibatch Loss= 0.5948, Training Accuracy= 0.867, Testing Acc= 0.736842, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 100, Reward=-31.500011, Minibatch Loss= 0.6390, Training Accuracy= 0.805, Testing Acc= 0.605263, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 100, Reward=-28.83702, Minibatch Loss= 0.6164, Training Accuracy= 0.859, Testing Acc= 0.657895, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 110, Reward=-32.827972, Minibatch Loss= 0.7412, Training Accuracy= 0.773, Testing Acc= 0.552632, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 110, Reward=-32.88594, Minibatch Loss= 0.6824, Training Accuracy= 0.797, Testing Acc= 0.640351, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 120, Reward=-26.08897, Minibatch Loss= 0.6019, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 120, Reward=-26.36701, Minibatch Loss= 0.6393, Training Accuracy= 0.812, Testing Acc= 0.508772, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 130, Reward=-23.34547, Minibatch Loss= 0.6712, Training Accuracy= 0.844, Testing Acc= 0.649123, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 130, Reward=-19.635534, Minibatch Loss= 0.6098, Training Accuracy= 0.859, Testing Acc= 0.631579, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 140, Reward=-25.193127, Minibatch Loss= 0.7859, Training Accuracy= 0.734, Testing Acc= 0.657895, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 140, Reward=-21.332407, Minibatch Loss= 0.6628, Training Accuracy= 0.812, Testing Acc= 0.596491, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 150, Reward=-26.280058, Minibatch Loss= 0.6144, Training Accuracy= 0.836, Testing Acc= 0.561404, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 150, Reward=-25.85795, Minibatch Loss= 0.6071, Training Accuracy= 0.844, Testing Acc= 0.526316, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 160, Reward=-16.808933, Minibatch Loss= 0.7346, Training Accuracy= 0.742, Testing Acc= 0.517544, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 160, Reward=-18.081896, Minibatch Loss= 0.8172, Training Accuracy= 0.711, Testing Acc= 0.526316, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 170, Reward=-35.205025, Minibatch Loss= 0.6434, Training Accuracy= 0.781, Testing Acc= 0.552632, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 170, Reward=-34.536797, Minibatch Loss= 0.6072, Training Accuracy= 0.852, Testing Acc= 0.535088, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 180, Reward=-28.399254, Minibatch Loss= 0.7308, Training Accuracy= 0.773, Testing Acc= 0.710526, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 180, Reward=-25.789722, Minibatch Loss= 0.7674, Training Accuracy= 0.742, Testing Acc= 0.517544, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 190, Reward=-28.299667, Minibatch Loss= 0.7612, Training Accuracy= 0.766, Testing Acc= 0.692982, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 190, Reward=-30.886368, Minibatch Loss= 0.7907, Training Accuracy= 0.766, Testing Acc= 0.631579, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 200, Reward=-32.781242, Minibatch Loss= 0.6666, Training Accuracy= 0.781, Testing Acc= 0.543860, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 200, Reward=-32.818336, Minibatch Loss= 0.6042, Training Accuracy= 0.844, Testing Acc= 0.517544, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 210, Reward=-27.20834, Minibatch Loss= 0.6218, Training Accuracy= 0.875, Testing Acc= 0.710526, Max Final Accuracy=  0.754386, Max AUC=  0.879194, Max AP=  0.902823\n",
      "Step 210, Reward=-24.26283, Minibatch Loss= 0.6220, Training Accuracy= 0.820, Testing Acc= 0.675439, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 220, Reward=-19.858006, Minibatch Loss= 0.5659, Training Accuracy= 0.906, Testing Acc= 0.780702, Max Final Accuracy=  0.780702, Max AUC=  0.832410, Max AP=  0.856131\n",
      "Step 220, Reward=-20.023897, Minibatch Loss= 0.5131, Training Accuracy= 0.914, Testing Acc= 0.526316, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 230, Reward=-28.656713, Minibatch Loss= 0.7199, Training Accuracy= 0.805, Testing Acc= 0.763158, Max Final Accuracy=  0.780702, Max AUC=  0.832410, Max AP=  0.856131\n",
      "Step 230, Reward=-27.76876, Minibatch Loss= 0.7509, Training Accuracy= 0.789, Testing Acc= 0.692982, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 240, Reward=-31.148155, Minibatch Loss= 0.6328, Training Accuracy= 0.883, Testing Acc= 0.543860, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 240, Reward=-30.230938, Minibatch Loss= 0.6509, Training Accuracy= 0.812, Testing Acc= 0.798246, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 250, Reward=-23.387367, Minibatch Loss= 0.6052, Training Accuracy= 0.844, Testing Acc= 0.605263, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 250, Reward=-25.599047, Minibatch Loss= 0.6274, Training Accuracy= 0.844, Testing Acc= 0.736842, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 260, Reward=-27.864746, Minibatch Loss= 0.6377, Training Accuracy= 0.852, Testing Acc= 0.701754, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 260, Reward=-28.475931, Minibatch Loss= 0.6045, Training Accuracy= 0.891, Testing Acc= 0.657895, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 270, Reward=-24.096325, Minibatch Loss= 0.5611, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 270, Reward=-23.857067, Minibatch Loss= 0.5385, Training Accuracy= 0.859, Testing Acc= 0.736842, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 280, Reward=-28.647318, Minibatch Loss= 0.6179, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 280, Reward=-28.488834, Minibatch Loss= 0.6231, Training Accuracy= 0.852, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 290, Reward=-26.058004, Minibatch Loss= 0.7214, Training Accuracy= 0.820, Testing Acc= 0.543860, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 290, Reward=-28.666367, Minibatch Loss= 0.8407, Training Accuracy= 0.797, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 300, Reward=-19.00285, Minibatch Loss= 0.6266, Training Accuracy= 0.922, Testing Acc= 0.710526, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 300, Reward=-23.00026, Minibatch Loss= 0.6367, Training Accuracy= 0.859, Testing Acc= 0.763158, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 310, Reward=-17.578905, Minibatch Loss= 0.6994, Training Accuracy= 0.773, Testing Acc= 0.631579, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 310, Reward=-18.703651, Minibatch Loss= 0.6491, Training Accuracy= 0.812, Testing Acc= 0.535088, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 320, Reward=-26.810446, Minibatch Loss= 0.7721, Training Accuracy= 0.758, Testing Acc= 0.587719, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 320, Reward=-27.229458, Minibatch Loss= 0.7629, Training Accuracy= 0.750, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 330, Reward=-23.5887, Minibatch Loss= 0.7627, Training Accuracy= 0.828, Testing Acc= 0.622807, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 330, Reward=-25.795809, Minibatch Loss= 0.5488, Training Accuracy= 0.875, Testing Acc= 0.614035, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 340, Reward=-26.346464, Minibatch Loss= 0.6437, Training Accuracy= 0.844, Testing Acc= 0.526316, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 340, Reward=-21.393562, Minibatch Loss= 0.5741, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 350, Reward=-24.361124, Minibatch Loss= 0.6825, Training Accuracy= 0.836, Testing Acc= 0.517544, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 350, Reward=-22.13681, Minibatch Loss= 0.6032, Training Accuracy= 0.875, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 360, Reward=-26.69798, Minibatch Loss= 0.5770, Training Accuracy= 0.859, Testing Acc= 0.517544, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 360, Reward=-21.331501, Minibatch Loss= 0.5478, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 370, Reward=-19.43175, Minibatch Loss= 0.5897, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 370, Reward=-24.040718, Minibatch Loss= 0.6520, Training Accuracy= 0.812, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 380, Reward=-29.152264, Minibatch Loss= 0.5845, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 380, Reward=-35.61863, Minibatch Loss= 0.6769, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 390, Reward=-22.537933, Minibatch Loss= 0.5694, Training Accuracy= 0.875, Testing Acc= 0.596491, Max Final Accuracy=  0.763158, Max AUC=  0.865035, Max AP=  0.892358\n",
      "Step 390, Reward=-25.024052, Minibatch Loss= 0.6170, Training Accuracy= 0.891, Testing Acc= 0.535088, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 400, Reward=-26.84056, Minibatch Loss= 0.5529, Training Accuracy= 0.859, Testing Acc= 0.789474, Max Final Accuracy=  0.789474, Max AUC=  0.835796, Max AP=  0.870512\n",
      "Step 400, Reward=-27.127525, Minibatch Loss= 0.5541, Training Accuracy= 0.891, Testing Acc= 0.745614, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 410, Reward=-30.716467, Minibatch Loss= 0.7152, Training Accuracy= 0.789, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.835796, Max AP=  0.870512\n",
      "Step 410, Reward=-30.87084, Minibatch Loss= 0.6326, Training Accuracy= 0.867, Testing Acc= 0.701754, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 420, Reward=-21.023895, Minibatch Loss= 0.6561, Training Accuracy= 0.828, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.835796, Max AP=  0.870512\n",
      "Step 420, Reward=-26.902695, Minibatch Loss= 0.7755, Training Accuracy= 0.812, Testing Acc= 0.745614, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 430, Reward=-18.348316, Minibatch Loss= 0.6154, Training Accuracy= 0.820, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.835796, Max AP=  0.870512\n",
      "Step 430, Reward=-24.836348, Minibatch Loss= 0.7709, Training Accuracy= 0.820, Testing Acc= 0.763158, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 440, Reward=-27.981018, Minibatch Loss= 0.7336, Training Accuracy= 0.828, Testing Acc= 0.763158, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 440, Reward=-19.810461, Minibatch Loss= 0.5808, Training Accuracy= 0.867, Testing Acc= 0.798246, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 450, Reward=-28.604717, Minibatch Loss= 0.8507, Training Accuracy= 0.773, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 450, Reward=-18.286098, Minibatch Loss= 0.6889, Training Accuracy= 0.781, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 460, Reward=-28.417295, Minibatch Loss= 0.6536, Training Accuracy= 0.836, Testing Acc= 0.666667, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 460, Reward=-25.019835, Minibatch Loss= 0.6103, Training Accuracy= 0.820, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 470, Reward=-20.660381, Minibatch Loss= 0.5923, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 470, Reward=-18.572245, Minibatch Loss= 0.6250, Training Accuracy= 0.844, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 480, Reward=-22.790434, Minibatch Loss= 0.6236, Training Accuracy= 0.852, Testing Acc= 0.649123, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 480, Reward=-26.115093, Minibatch Loss= 0.6287, Training Accuracy= 0.891, Testing Acc= 0.736842, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 490, Reward=-15.375965, Minibatch Loss= 0.5372, Training Accuracy= 0.875, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 490, Reward=-16.247454, Minibatch Loss= 0.6507, Training Accuracy= 0.836, Testing Acc= 0.719298, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 500, Reward=-23.658102, Minibatch Loss= 0.6925, Training Accuracy= 0.820, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 500, Reward=-23.865498, Minibatch Loss= 0.6586, Training Accuracy= 0.852, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 510, Reward=-21.05756, Minibatch Loss= 0.5855, Training Accuracy= 0.891, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 510, Reward=-19.757378, Minibatch Loss= 0.5866, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 520, Reward=-20.53944, Minibatch Loss= 0.6225, Training Accuracy= 0.859, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 520, Reward=-21.671532, Minibatch Loss= 0.5988, Training Accuracy= 0.859, Testing Acc= 0.535088, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 530, Reward=-17.742655, Minibatch Loss= 0.7414, Training Accuracy= 0.820, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 530, Reward=-20.019882, Minibatch Loss= 0.6149, Training Accuracy= 0.859, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 540, Reward=-27.178253, Minibatch Loss= 0.7458, Training Accuracy= 0.828, Testing Acc= 0.763158, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 540, Reward=-20.265509, Minibatch Loss= 0.6659, Training Accuracy= 0.828, Testing Acc= 0.745614, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 550, Reward=-25.074793, Minibatch Loss= 0.6711, Training Accuracy= 0.844, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 550, Reward=-23.141693, Minibatch Loss= 0.6766, Training Accuracy= 0.836, Testing Acc= 0.587719, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 560, Reward=-25.458046, Minibatch Loss= 0.7562, Training Accuracy= 0.781, Testing Acc= 0.754386, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 560, Reward=-29.366901, Minibatch Loss= 0.7087, Training Accuracy= 0.797, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 570, Reward=-26.752125, Minibatch Loss= 0.5614, Training Accuracy= 0.938, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 570, Reward=-24.099813, Minibatch Loss= 0.5105, Training Accuracy= 0.930, Testing Acc= 0.657895, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 580, Reward=-20.927258, Minibatch Loss= 0.7005, Training Accuracy= 0.766, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 580, Reward=-20.67317, Minibatch Loss= 0.6177, Training Accuracy= 0.844, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 590, Reward=-24.206772, Minibatch Loss= 0.6293, Training Accuracy= 0.852, Testing Acc= 0.614035, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 590, Reward=-18.326563, Minibatch Loss= 0.6864, Training Accuracy= 0.844, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 600, Reward=-21.11343, Minibatch Loss= 0.7454, Training Accuracy= 0.750, Testing Acc= 0.657895, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 600, Reward=-24.617197, Minibatch Loss= 0.7719, Training Accuracy= 0.781, Testing Acc= 0.692982, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 610, Reward=-19.493769, Minibatch Loss= 0.6444, Training Accuracy= 0.867, Testing Acc= 0.763158, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 610, Reward=-19.935596, Minibatch Loss= 0.6523, Training Accuracy= 0.859, Testing Acc= 0.657895, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 620, Reward=-28.923943, Minibatch Loss= 0.6971, Training Accuracy= 0.781, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 620, Reward=-27.984108, Minibatch Loss= 0.6316, Training Accuracy= 0.852, Testing Acc= 0.666667, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 630, Reward=-22.480974, Minibatch Loss= 0.6357, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 630, Reward=-16.385725, Minibatch Loss= 0.5289, Training Accuracy= 0.891, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 640, Reward=-19.01624, Minibatch Loss= 0.5891, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 640, Reward=-15.376969, Minibatch Loss= 0.5675, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 650, Reward=-22.757698, Minibatch Loss= 0.5327, Training Accuracy= 0.883, Testing Acc= 0.701754, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 650, Reward=-21.2649, Minibatch Loss= 0.4633, Training Accuracy= 0.922, Testing Acc= 0.692982, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 660, Reward=-22.77966, Minibatch Loss= 0.6325, Training Accuracy= 0.867, Testing Acc= 0.798246, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 660, Reward=-18.201714, Minibatch Loss= 0.4488, Training Accuracy= 0.906, Testing Acc= 0.675439, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 670, Reward=-23.67343, Minibatch Loss= 0.6183, Training Accuracy= 0.875, Testing Acc= 0.798246, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 670, Reward=-25.745338, Minibatch Loss= 0.6395, Training Accuracy= 0.859, Testing Acc= 0.692982, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 680, Reward=-23.19235, Minibatch Loss= 0.7438, Training Accuracy= 0.820, Testing Acc= 0.789474, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 680, Reward=-22.40897, Minibatch Loss= 0.7782, Training Accuracy= 0.797, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 690, Reward=-28.396984, Minibatch Loss= 0.8357, Training Accuracy= 0.773, Testing Acc= 0.587719, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 690, Reward=-27.591667, Minibatch Loss= 0.8123, Training Accuracy= 0.797, Testing Acc= 0.649123, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 700, Reward=-23.25147, Minibatch Loss= 0.8078, Training Accuracy= 0.883, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 700, Reward=-15.866611, Minibatch Loss= 0.6515, Training Accuracy= 0.875, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 710, Reward=-29.268513, Minibatch Loss= 0.7093, Training Accuracy= 0.859, Testing Acc= 0.736842, Max Final Accuracy=  0.798246, Max AUC=  0.880117, Max AP=  0.899613\n",
      "Step 710, Reward=-19.543045, Minibatch Loss= 0.6393, Training Accuracy= 0.844, Testing Acc= 0.701754, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 720, Reward=-24.895605, Minibatch Loss= 0.6925, Training Accuracy= 0.859, Testing Acc= 0.815789, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 720, Reward=-21.517649, Minibatch Loss= 0.6554, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 730, Reward=-22.874119, Minibatch Loss= 0.6243, Training Accuracy= 0.875, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 730, Reward=-24.24445, Minibatch Loss= 0.5543, Training Accuracy= 0.922, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 740, Reward=-15.185156, Minibatch Loss= 0.6666, Training Accuracy= 0.852, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 740, Reward=-11.625513, Minibatch Loss= 0.5446, Training Accuracy= 0.906, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 750, Reward=-27.967213, Minibatch Loss= 0.6005, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 750, Reward=-21.108765, Minibatch Loss= 0.5262, Training Accuracy= 0.906, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 760, Reward=-18.403477, Minibatch Loss= 0.7184, Training Accuracy= 0.781, Testing Acc= 0.789474, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 760, Reward=-15.963989, Minibatch Loss= 0.8000, Training Accuracy= 0.820, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 770, Reward=-25.254364, Minibatch Loss= 0.6055, Training Accuracy= 0.844, Testing Acc= 0.815789, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 770, Reward=-23.369865, Minibatch Loss= 0.6514, Training Accuracy= 0.836, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 780, Reward=-26.751823, Minibatch Loss= 0.8020, Training Accuracy= 0.750, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 780, Reward=-24.424603, Minibatch Loss= 0.6089, Training Accuracy= 0.875, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 790, Reward=-32.513115, Minibatch Loss= 0.6494, Training Accuracy= 0.867, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 790, Reward=-25.261366, Minibatch Loss= 0.5336, Training Accuracy= 0.914, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 800, Reward=-18.758377, Minibatch Loss= 0.6073, Training Accuracy= 0.891, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 800, Reward=-13.992447, Minibatch Loss= 0.5185, Training Accuracy= 0.875, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 810, Reward=-18.40371, Minibatch Loss= 0.6306, Training Accuracy= 0.844, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 810, Reward=-14.529812, Minibatch Loss= 0.5381, Training Accuracy= 0.828, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 820, Reward=-26.374306, Minibatch Loss= 0.6581, Training Accuracy= 0.812, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 820, Reward=-19.500021, Minibatch Loss= 0.4990, Training Accuracy= 0.859, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 830, Reward=-27.018, Minibatch Loss= 0.5507, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 830, Reward=-20.483467, Minibatch Loss= 0.5636, Training Accuracy= 0.875, Testing Acc= 0.649123, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 840, Reward=-20.257263, Minibatch Loss= 0.6182, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 840, Reward=-17.295527, Minibatch Loss= 0.5116, Training Accuracy= 0.914, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 850, Reward=-25.476976, Minibatch Loss= 0.5941, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 850, Reward=-20.516468, Minibatch Loss= 0.4723, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 860, Reward=-21.940872, Minibatch Loss= 0.6194, Training Accuracy= 0.852, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 860, Reward=-19.976128, Minibatch Loss= 0.6273, Training Accuracy= 0.828, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 870, Reward=-23.700842, Minibatch Loss= 0.5504, Training Accuracy= 0.883, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 870, Reward=-22.678024, Minibatch Loss= 0.5809, Training Accuracy= 0.852, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 880, Reward=-21.695827, Minibatch Loss= 0.4831, Training Accuracy= 0.891, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 880, Reward=-24.438614, Minibatch Loss= 0.5107, Training Accuracy= 0.891, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 890, Reward=-17.962925, Minibatch Loss= 0.5936, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 890, Reward=-23.529263, Minibatch Loss= 0.5793, Training Accuracy= 0.852, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 900, Reward=-28.431969, Minibatch Loss= 0.7199, Training Accuracy= 0.805, Testing Acc= 0.807018, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 900, Reward=-26.80937, Minibatch Loss= 0.5272, Training Accuracy= 0.836, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 910, Reward=-26.587763, Minibatch Loss= 0.6012, Training Accuracy= 0.867, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 910, Reward=-23.954697, Minibatch Loss= 0.5015, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 920, Reward=-18.941149, Minibatch Loss= 0.6183, Training Accuracy= 0.875, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 920, Reward=-15.24518, Minibatch Loss= 0.4753, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 930, Reward=-21.535378, Minibatch Loss= 0.5535, Training Accuracy= 0.875, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 930, Reward=-15.511561, Minibatch Loss= 0.4742, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 940, Reward=-29.303171, Minibatch Loss= 0.6191, Training Accuracy= 0.867, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 940, Reward=-23.245493, Minibatch Loss= 0.4866, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 950, Reward=-29.885498, Minibatch Loss= 0.8840, Training Accuracy= 0.734, Testing Acc= 0.771930, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 950, Reward=-11.63589, Minibatch Loss= 0.5765, Training Accuracy= 0.859, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 960, Reward=-27.220032, Minibatch Loss= 0.7640, Training Accuracy= 0.836, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 960, Reward=-13.786436, Minibatch Loss= 0.6757, Training Accuracy= 0.852, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 970, Reward=-25.83447, Minibatch Loss= 0.6891, Training Accuracy= 0.867, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 970, Reward=-18.859764, Minibatch Loss= 0.5079, Training Accuracy= 0.875, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 980, Reward=-21.924755, Minibatch Loss= 0.5546, Training Accuracy= 0.883, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 980, Reward=-21.747467, Minibatch Loss= 0.4446, Training Accuracy= 0.906, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 990, Reward=-20.109179, Minibatch Loss= 0.6429, Training Accuracy= 0.852, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 990, Reward=-16.533752, Minibatch Loss= 0.5920, Training Accuracy= 0.844, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1000, Reward=-13.74307, Minibatch Loss= 0.5129, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1000, Reward=-11.192562, Minibatch Loss= 0.5401, Training Accuracy= 0.852, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1010, Reward=-26.096405, Minibatch Loss= 0.7406, Training Accuracy= 0.789, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1010, Reward=-23.395744, Minibatch Loss= 0.8817, Training Accuracy= 0.789, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1020, Reward=-20.219807, Minibatch Loss= 0.6173, Training Accuracy= 0.820, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1020, Reward=-26.629286, Minibatch Loss= 0.7096, Training Accuracy= 0.828, Testing Acc= 0.456140, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1030, Reward=-21.14649, Minibatch Loss= 0.5384, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1030, Reward=-25.139215, Minibatch Loss= 0.6588, Training Accuracy= 0.836, Testing Acc= 0.456140, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1040, Reward=-17.098936, Minibatch Loss= 0.6076, Training Accuracy= 0.812, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1040, Reward=-18.800713, Minibatch Loss= 0.6650, Training Accuracy= 0.812, Testing Acc= 0.447368, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1050, Reward=-18.317385, Minibatch Loss= 0.5676, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1050, Reward=-20.770355, Minibatch Loss= 0.5824, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1060, Reward=-12.936328, Minibatch Loss= 0.5100, Training Accuracy= 0.859, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1060, Reward=-18.462437, Minibatch Loss= 0.6286, Training Accuracy= 0.820, Testing Acc= 0.464912, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1070, Reward=-23.466011, Minibatch Loss= 0.4730, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1070, Reward=-28.93881, Minibatch Loss= 0.6097, Training Accuracy= 0.852, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1080, Reward=-20.780815, Minibatch Loss= 0.4850, Training Accuracy= 0.875, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1080, Reward=-26.408304, Minibatch Loss= 0.7023, Training Accuracy= 0.812, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1090, Reward=-21.6375, Minibatch Loss= 0.4725, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1090, Reward=-26.958263, Minibatch Loss= 0.5398, Training Accuracy= 0.914, Testing Acc= 0.710526, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1100, Reward=-19.890835, Minibatch Loss= 0.4256, Training Accuracy= 0.945, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1100, Reward=-20.88052, Minibatch Loss= 0.4382, Training Accuracy= 0.953, Testing Acc= 0.710526, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1110, Reward=-20.316977, Minibatch Loss= 0.5991, Training Accuracy= 0.828, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1110, Reward=-17.481943, Minibatch Loss= 0.5706, Training Accuracy= 0.891, Testing Acc= 0.719298, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1120, Reward=-24.725456, Minibatch Loss= 0.7874, Training Accuracy= 0.914, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1120, Reward=-18.80012, Minibatch Loss= 0.6298, Training Accuracy= 0.859, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1130, Reward=-25.525732, Minibatch Loss= 0.8222, Training Accuracy= 0.805, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1130, Reward=-17.927616, Minibatch Loss= 0.5900, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1140, Reward=-31.294495, Minibatch Loss= 0.6343, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1140, Reward=-24.640364, Minibatch Loss= 0.5485, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1150, Reward=-16.875723, Minibatch Loss= 0.5451, Training Accuracy= 0.883, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1150, Reward=-9.701949, Minibatch Loss= 0.4754, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1160, Reward=-15.834867, Minibatch Loss= 0.5458, Training Accuracy= 0.914, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1160, Reward=-9.160315, Minibatch Loss= 0.4524, Training Accuracy= 0.891, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1170, Reward=-22.727503, Minibatch Loss= 0.5666, Training Accuracy= 0.883, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1170, Reward=-19.520897, Minibatch Loss= 0.4797, Training Accuracy= 0.875, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1180, Reward=-17.24245, Minibatch Loss= 0.5832, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1180, Reward=-18.054514, Minibatch Loss= 0.4381, Training Accuracy= 0.914, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1190, Reward=-19.544989, Minibatch Loss= 0.5692, Training Accuracy= 0.859, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1190, Reward=-17.116337, Minibatch Loss= 0.5879, Training Accuracy= 0.844, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1200, Reward=-23.20903, Minibatch Loss= 0.5950, Training Accuracy= 0.867, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1200, Reward=-20.423115, Minibatch Loss= 0.5086, Training Accuracy= 0.898, Testing Acc= 0.666667, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1210, Reward=-24.453705, Minibatch Loss= 0.6541, Training Accuracy= 0.773, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1210, Reward=-14.437017, Minibatch Loss= 0.5081, Training Accuracy= 0.859, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1220, Reward=-24.348099, Minibatch Loss= 0.6694, Training Accuracy= 0.867, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1220, Reward=-14.75437, Minibatch Loss= 0.4894, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1230, Reward=-14.810407, Minibatch Loss= 0.6014, Training Accuracy= 0.875, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1230, Reward=-13.464937, Minibatch Loss= 0.4913, Training Accuracy= 0.875, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1240, Reward=-18.098701, Minibatch Loss= 0.5300, Training Accuracy= 0.859, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1240, Reward=-19.325294, Minibatch Loss= 0.4810, Training Accuracy= 0.875, Testing Acc= 0.692982, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1250, Reward=-16.292831, Minibatch Loss= 0.5713, Training Accuracy= 0.891, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1250, Reward=-18.133783, Minibatch Loss= 0.5774, Training Accuracy= 0.852, Testing Acc= 0.736842, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1260, Reward=-36.189816, Minibatch Loss= 0.6742, Training Accuracy= 0.844, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1260, Reward=-34.24762, Minibatch Loss= 0.6962, Training Accuracy= 0.844, Testing Acc= 0.719298, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1270, Reward=-25.301054, Minibatch Loss= 0.6527, Training Accuracy= 0.859, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1270, Reward=-18.020866, Minibatch Loss= 0.5509, Training Accuracy= 0.875, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1280, Reward=-31.81429, Minibatch Loss= 0.6061, Training Accuracy= 0.875, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1280, Reward=-21.534399, Minibatch Loss= 0.4502, Training Accuracy= 0.922, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1290, Reward=-23.163254, Minibatch Loss= 0.5152, Training Accuracy= 0.930, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1290, Reward=-17.288006, Minibatch Loss= 0.4591, Training Accuracy= 0.930, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1300, Reward=-22.466433, Minibatch Loss= 0.4630, Training Accuracy= 0.938, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1300, Reward=-21.65511, Minibatch Loss= 0.4732, Training Accuracy= 0.930, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1310, Reward=-16.22718, Minibatch Loss= 0.5289, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1310, Reward=-17.289158, Minibatch Loss= 0.5494, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1320, Reward=-14.417511, Minibatch Loss= 0.6193, Training Accuracy= 0.867, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1320, Reward=-13.200382, Minibatch Loss= 0.5575, Training Accuracy= 0.898, Testing Acc= 0.701754, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1330, Reward=-29.944977, Minibatch Loss= 0.5763, Training Accuracy= 0.930, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1330, Reward=-26.04816, Minibatch Loss= 0.4828, Training Accuracy= 0.906, Testing Acc= 0.692982, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1340, Reward=-8.962917, Minibatch Loss= 0.7084, Training Accuracy= 0.836, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1340, Reward=-6.3233285, Minibatch Loss= 0.4701, Training Accuracy= 0.914, Testing Acc= 0.675439, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1350, Reward=-25.710361, Minibatch Loss= 0.7109, Training Accuracy= 0.852, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1350, Reward=-17.684872, Minibatch Loss= 0.5187, Training Accuracy= 0.867, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1360, Reward=-18.022745, Minibatch Loss= 0.6284, Training Accuracy= 0.844, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1360, Reward=-12.906064, Minibatch Loss= 0.5208, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1370, Reward=-16.701813, Minibatch Loss= 0.5686, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1370, Reward=-12.635768, Minibatch Loss= 0.4185, Training Accuracy= 0.906, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1380, Reward=-14.185377, Minibatch Loss= 0.4590, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1380, Reward=-10.452325, Minibatch Loss= 0.4019, Training Accuracy= 0.906, Testing Acc= 0.666667, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1390, Reward=-25.689184, Minibatch Loss= 0.6341, Training Accuracy= 0.812, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1400, Reward=-35.04231, Minibatch Loss= 0.7130, Training Accuracy= 0.852, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1390, Reward=-17.683802, Minibatch Loss= 0.4561, Training Accuracy= 0.844, Testing Acc= 0.587719, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1410, Reward=-24.41205, Minibatch Loss= 0.6365, Training Accuracy= 0.875, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1400, Reward=-17.64814, Minibatch Loss= 0.5620, Training Accuracy= 0.875, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1420, Reward=-18.786346, Minibatch Loss= 0.6613, Training Accuracy= 0.844, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1410, Reward=-18.956818, Minibatch Loss= 0.5413, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1430, Reward=-25.427147, Minibatch Loss= 0.4984, Training Accuracy= 0.945, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1420, Reward=-19.115261, Minibatch Loss= 0.8052, Training Accuracy= 0.828, Testing Acc= 0.771930, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1440, Reward=-20.95845, Minibatch Loss= 0.5851, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1430, Reward=-24.950977, Minibatch Loss= 0.7638, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1450, Reward=-18.50261, Minibatch Loss= 0.4723, Training Accuracy= 0.922, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1440, Reward=-25.858957, Minibatch Loss= 0.7877, Training Accuracy= 0.875, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1460, Reward=-17.984594, Minibatch Loss= 0.5860, Training Accuracy= 0.844, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1450, Reward=-23.502325, Minibatch Loss= 0.7526, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1470, Reward=-19.708002, Minibatch Loss= 0.7569, Training Accuracy= 0.883, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1460, Reward=-25.686535, Minibatch Loss= 0.6939, Training Accuracy= 0.891, Testing Acc= 0.464912, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1480, Reward=-18.131947, Minibatch Loss= 0.6218, Training Accuracy= 0.844, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1470, Reward=-27.91198, Minibatch Loss= 1.1285, Training Accuracy= 0.805, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1490, Reward=-25.515198, Minibatch Loss= 0.5405, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1480, Reward=-29.17255, Minibatch Loss= 0.9588, Training Accuracy= 0.906, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1500, Reward=-25.121115, Minibatch Loss= 0.5339, Training Accuracy= 0.898, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1490, Reward=-32.963837, Minibatch Loss= 1.0100, Training Accuracy= 0.867, Testing Acc= 0.587719, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1510, Reward=-19.432568, Minibatch Loss= 0.4951, Training Accuracy= 0.914, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1500, Reward=-25.9181, Minibatch Loss= 0.7860, Training Accuracy= 0.875, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1520, Reward=-28.551414, Minibatch Loss= 0.7022, Training Accuracy= 0.781, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1510, Reward=-20.54389, Minibatch Loss= 0.8261, Training Accuracy= 0.844, Testing Acc= 0.535088, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1530, Reward=-33.62552, Minibatch Loss= 0.6850, Training Accuracy= 0.844, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1520, Reward=-31.176186, Minibatch Loss= 0.8731, Training Accuracy= 0.781, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1540, Reward=-30.740028, Minibatch Loss= 0.7043, Training Accuracy= 0.766, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1530, Reward=-26.360655, Minibatch Loss= 0.8381, Training Accuracy= 0.820, Testing Acc= 0.649123, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1550, Reward=-17.728363, Minibatch Loss= 0.6582, Training Accuracy= 0.828, Testing Acc= 0.807018, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1540, Reward=-32.9809, Minibatch Loss= 0.8487, Training Accuracy= 0.836, Testing Acc= 0.675439, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1560, Reward=-16.094868, Minibatch Loss= 0.5797, Training Accuracy= 0.875, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1550, Reward=-21.967232, Minibatch Loss= 0.8727, Training Accuracy= 0.805, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1570, Reward=-23.034286, Minibatch Loss= 0.6177, Training Accuracy= 0.836, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1560, Reward=-20.428463, Minibatch Loss= 0.7870, Training Accuracy= 0.812, Testing Acc= 0.587719, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1580, Reward=-22.738272, Minibatch Loss= 0.6021, Training Accuracy= 0.883, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1570, Reward=-23.085749, Minibatch Loss= 0.8803, Training Accuracy= 0.750, Testing Acc= 0.535088, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1590, Reward=-12.47061, Minibatch Loss= 0.6031, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1580, Reward=-31.50241, Minibatch Loss= 0.9071, Training Accuracy= 0.797, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1600, Reward=-11.875131, Minibatch Loss= 0.5449, Training Accuracy= 0.930, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1610, Reward=-15.322463, Minibatch Loss= 0.6579, Training Accuracy= 0.781, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1590, Reward=-20.602898, Minibatch Loss= 0.7709, Training Accuracy= 0.945, Testing Acc= 0.587719, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1600, Reward=-14.759659, Minibatch Loss= 0.7276, Training Accuracy= 0.914, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1620, Reward=-30.877855, Minibatch Loss= 0.6393, Training Accuracy= 0.898, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1610, Reward=-12.549738, Minibatch Loss= 0.6640, Training Accuracy= 0.930, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1630, Reward=-14.801037, Minibatch Loss= 0.6233, Training Accuracy= 0.859, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1620, Reward=-23.147083, Minibatch Loss= 0.6145, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1640, Reward=-11.163693, Minibatch Loss= 0.5651, Training Accuracy= 0.875, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1630, Reward=-21.549139, Minibatch Loss= 1.0335, Training Accuracy= 0.711, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1650, Reward=-19.501305, Minibatch Loss= 0.5624, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1660, Reward=-19.987263, Minibatch Loss= 0.5047, Training Accuracy= 0.898, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1640, Reward=-21.840704, Minibatch Loss= 0.7346, Training Accuracy= 0.828, Testing Acc= 0.535088, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1670, Reward=-16.403309, Minibatch Loss= 0.4610, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1650, Reward=-27.309664, Minibatch Loss= 0.7404, Training Accuracy= 0.844, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1680, Reward=-15.647894, Minibatch Loss= 0.4712, Training Accuracy= 0.883, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1660, Reward=-23.284718, Minibatch Loss= 0.6264, Training Accuracy= 0.922, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1690, Reward=-16.598448, Minibatch Loss= 0.7870, Training Accuracy= 0.773, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1670, Reward=-19.90511, Minibatch Loss= 0.6445, Training Accuracy= 0.938, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1700, Reward=-20.533558, Minibatch Loss= 0.5501, Training Accuracy= 0.836, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1680, Reward=-18.394852, Minibatch Loss= 0.5295, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1710, Reward=-15.428946, Minibatch Loss= 0.4207, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1690, Reward=-17.54682, Minibatch Loss= 0.8711, Training Accuracy= 0.797, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1720, Reward=-23.237995, Minibatch Loss= 0.4396, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1700, Reward=-25.897335, Minibatch Loss= 0.6555, Training Accuracy= 0.867, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1730, Reward=-12.193154, Minibatch Loss= 0.5174, Training Accuracy= 0.859, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1710, Reward=-19.473837, Minibatch Loss= 0.5732, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1740, Reward=-16.504234, Minibatch Loss= 0.5010, Training Accuracy= 0.914, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1720, Reward=-21.28642, Minibatch Loss= 0.5114, Training Accuracy= 0.914, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1750, Reward=-20.301722, Minibatch Loss= 0.5950, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1730, Reward=-16.05931, Minibatch Loss= 0.6313, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1760, Reward=-12.447597, Minibatch Loss= 0.6517, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1740, Reward=-18.675556, Minibatch Loss= 0.5904, Training Accuracy= 0.898, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1770, Reward=-23.054586, Minibatch Loss= 0.5631, Training Accuracy= 0.859, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1750, Reward=-22.317928, Minibatch Loss= 0.6318, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1780, Reward=-16.273897, Minibatch Loss= 0.4819, Training Accuracy= 0.914, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1760, Reward=-18.222944, Minibatch Loss= 0.7000, Training Accuracy= 0.852, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1790, Reward=-26.661802, Minibatch Loss= 0.6209, Training Accuracy= 0.859, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1770, Reward=-21.634245, Minibatch Loss= 0.5709, Training Accuracy= 0.898, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1800, Reward=-31.32909, Minibatch Loss= 0.6138, Training Accuracy= 0.859, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1780, Reward=-10.987028, Minibatch Loss= 0.4683, Training Accuracy= 0.922, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1810, Reward=-18.71298, Minibatch Loss= 0.6495, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1790, Reward=-24.736115, Minibatch Loss= 0.5534, Training Accuracy= 0.867, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1820, Reward=-16.420319, Minibatch Loss= 0.4992, Training Accuracy= 0.898, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1800, Reward=-28.9086, Minibatch Loss= 0.5689, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1830, Reward=-19.38478, Minibatch Loss= 0.4625, Training Accuracy= 0.930, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1810, Reward=-14.248364, Minibatch Loss= 0.6301, Training Accuracy= 0.859, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1840, Reward=-18.05414, Minibatch Loss= 0.6019, Training Accuracy= 0.828, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1820, Reward=-15.189964, Minibatch Loss= 0.5516, Training Accuracy= 0.891, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1850, Reward=-27.90935, Minibatch Loss= 0.6130, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1830, Reward=-19.569887, Minibatch Loss= 0.4715, Training Accuracy= 0.922, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1860, Reward=-19.249321, Minibatch Loss= 0.7103, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1840, Reward=-15.072964, Minibatch Loss= 0.5278, Training Accuracy= 0.852, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1870, Reward=-19.913824, Minibatch Loss= 0.5135, Training Accuracy= 0.930, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1850, Reward=-25.274721, Minibatch Loss= 0.5166, Training Accuracy= 0.898, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1880, Reward=-12.106622, Minibatch Loss= 0.5624, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1860, Reward=-19.953884, Minibatch Loss= 0.5138, Training Accuracy= 0.883, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1890, Reward=-16.06377, Minibatch Loss= 0.5938, Training Accuracy= 0.781, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1870, Reward=-13.526361, Minibatch Loss= 0.4544, Training Accuracy= 0.914, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1900, Reward=-19.75888, Minibatch Loss= 0.4895, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1880, Reward=-9.146395, Minibatch Loss= 0.4487, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1910, Reward=-17.348658, Minibatch Loss= 0.4969, Training Accuracy= 0.891, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1890, Reward=-17.451729, Minibatch Loss= 0.4793, Training Accuracy= 0.922, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1920, Reward=-16.951971, Minibatch Loss= 0.5634, Training Accuracy= 0.852, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1900, Reward=-18.197367, Minibatch Loss= 0.5376, Training Accuracy= 0.867, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1930, Reward=-22.509916, Minibatch Loss= 0.5705, Training Accuracy= 0.891, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1940, Reward=-14.922651, Minibatch Loss= 0.5719, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1910, Reward=-17.75483, Minibatch Loss= 0.6321, Training Accuracy= 0.867, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1950, Reward=-15.8584175, Minibatch Loss= 0.5740, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1920, Reward=-23.303364, Minibatch Loss= 0.5742, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1960, Reward=-11.223029, Minibatch Loss= 0.4756, Training Accuracy= 0.922, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1930, Reward=-21.172743, Minibatch Loss= 0.5267, Training Accuracy= 0.898, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1970, Reward=-19.893494, Minibatch Loss= 0.5193, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1940, Reward=-14.190266, Minibatch Loss= 0.5100, Training Accuracy= 0.914, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1980, Reward=-18.411566, Minibatch Loss= 0.6392, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1950, Reward=-14.101626, Minibatch Loss= 0.6996, Training Accuracy= 0.812, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 1990, Reward=-18.602987, Minibatch Loss= 0.5244, Training Accuracy= 0.922, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1960, Reward=-19.405525, Minibatch Loss= 0.5466, Training Accuracy= 0.852, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2000, Reward=-21.87378, Minibatch Loss= 0.7102, Training Accuracy= 0.836, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1970, Reward=-24.976587, Minibatch Loss= 0.6986, Training Accuracy= 0.844, Testing Acc= 0.657895, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2010, Reward=-14.774494, Minibatch Loss= 0.5210, Training Accuracy= 0.914, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1980, Reward=-28.736826, Minibatch Loss= 0.8327, Training Accuracy= 0.820, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2020, Reward=-17.952152, Minibatch Loss= 0.5549, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 1990, Reward=-26.074938, Minibatch Loss= 0.9333, Training Accuracy= 0.812, Testing Acc= 0.649123, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2030, Reward=-12.142948, Minibatch Loss= 0.4635, Training Accuracy= 0.914, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2000, Reward=-26.630846, Minibatch Loss= 0.8122, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2040, Reward=-6.524378, Minibatch Loss= 0.5047, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2010, Reward=-11.440598, Minibatch Loss= 0.6679, Training Accuracy= 0.891, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2050, Reward=-13.604745, Minibatch Loss= 0.4490, Training Accuracy= 0.914, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2020, Reward=-27.199827, Minibatch Loss= 1.0330, Training Accuracy= 0.836, Testing Acc= 0.447368, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2060, Reward=-13.807181, Minibatch Loss= 0.4195, Training Accuracy= 0.906, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2030, Reward=-26.976376, Minibatch Loss= 1.1494, Training Accuracy= 0.727, Testing Acc= 0.429825, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2070, Reward=-5.970825, Minibatch Loss= 0.4753, Training Accuracy= 0.898, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2040, Reward=-22.238152, Minibatch Loss= 0.9585, Training Accuracy= 0.789, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2080, Reward=-12.177774, Minibatch Loss= 0.4127, Training Accuracy= 0.922, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2050, Reward=-26.059286, Minibatch Loss= 0.8167, Training Accuracy= 0.898, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2090, Reward=-11.128807, Minibatch Loss= 0.4749, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2060, Reward=-21.48097, Minibatch Loss= 0.7980, Training Accuracy= 0.914, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2100, Reward=-16.850832, Minibatch Loss= 0.4943, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2070, Reward=-13.214875, Minibatch Loss= 0.7055, Training Accuracy= 0.875, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2110, Reward=-17.855394, Minibatch Loss= 0.4347, Training Accuracy= 0.922, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2080, Reward=-13.6192665, Minibatch Loss= 0.7727, Training Accuracy= 0.812, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2120, Reward=-13.520453, Minibatch Loss= 0.3451, Training Accuracy= 0.930, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2090, Reward=-22.388645, Minibatch Loss= 0.7431, Training Accuracy= 0.789, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2130, Reward=-11.900713, Minibatch Loss= 0.4118, Training Accuracy= 0.914, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2100, Reward=-23.06243, Minibatch Loss= 0.7512, Training Accuracy= 0.883, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2140, Reward=-16.271446, Minibatch Loss= 0.4591, Training Accuracy= 0.883, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2110, Reward=-20.452023, Minibatch Loss= 0.7167, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2150, Reward=-14.426506, Minibatch Loss= 0.3786, Training Accuracy= 0.953, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2120, Reward=-17.424595, Minibatch Loss= 0.5705, Training Accuracy= 0.898, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2160, Reward=-12.429363, Minibatch Loss= 0.3430, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2130, Reward=-12.583342, Minibatch Loss= 0.5383, Training Accuracy= 0.883, Testing Acc= 0.385965, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2170, Reward=-10.6640625, Minibatch Loss= 0.3851, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2140, Reward=-18.60648, Minibatch Loss= 0.6797, Training Accuracy= 0.859, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2180, Reward=-19.042862, Minibatch Loss= 0.4418, Training Accuracy= 0.891, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2150, Reward=-17.755463, Minibatch Loss= 0.5299, Training Accuracy= 0.930, Testing Acc= 0.429825, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2190, Reward=-15.207759, Minibatch Loss= 0.6262, Training Accuracy= 0.891, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2160, Reward=-16.986982, Minibatch Loss= 0.4968, Training Accuracy= 0.945, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2200, Reward=-10.315114, Minibatch Loss= 0.4806, Training Accuracy= 0.914, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2170, Reward=-10.353666, Minibatch Loss= 0.5030, Training Accuracy= 0.938, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2210, Reward=-14.755886, Minibatch Loss= 0.3995, Training Accuracy= 0.891, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2180, Reward=-15.059535, Minibatch Loss= 0.5108, Training Accuracy= 0.898, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2220, Reward=-3.4159317, Minibatch Loss= 0.3651, Training Accuracy= 0.930, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2190, Reward=-17.243198, Minibatch Loss= 0.7681, Training Accuracy= 0.891, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2230, Reward=-19.134808, Minibatch Loss= 0.5883, Training Accuracy= 0.781, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2200, Reward=-10.779558, Minibatch Loss= 0.5344, Training Accuracy= 0.914, Testing Acc= 0.456140, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2240, Reward=-18.943653, Minibatch Loss= 0.5190, Training Accuracy= 0.867, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2210, Reward=-17.839264, Minibatch Loss= 0.4642, Training Accuracy= 0.914, Testing Acc= 0.447368, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2250, Reward=-18.39829, Minibatch Loss= 0.7619, Training Accuracy= 0.820, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2220, Reward=-4.9389486, Minibatch Loss= 0.3982, Training Accuracy= 0.938, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2260, Reward=-15.841121, Minibatch Loss= 0.4658, Training Accuracy= 0.930, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2230, Reward=-18.280708, Minibatch Loss= 0.5882, Training Accuracy= 0.773, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2270, Reward=-17.358727, Minibatch Loss= 0.4561, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2240, Reward=-21.194616, Minibatch Loss= 0.7597, Training Accuracy= 0.836, Testing Acc= 0.438596, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2280, Reward=-11.469955, Minibatch Loss= 0.6345, Training Accuracy= 0.891, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2250, Reward=-19.695656, Minibatch Loss= 0.8190, Training Accuracy= 0.844, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2290, Reward=-13.627115, Minibatch Loss= 0.6045, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2260, Reward=-19.41595, Minibatch Loss= 0.5823, Training Accuracy= 0.914, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2300, Reward=-17.005365, Minibatch Loss= 0.5437, Training Accuracy= 0.953, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2270, Reward=-15.94126, Minibatch Loss= 0.5528, Training Accuracy= 0.906, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2310, Reward=-16.806814, Minibatch Loss= 0.5297, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2320, Reward=-13.165607, Minibatch Loss= 0.3528, Training Accuracy= 0.977, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2280, Reward=-9.347557, Minibatch Loss= 0.4458, Training Accuracy= 0.906, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2330, Reward=-19.95625, Minibatch Loss= 0.5680, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2290, Reward=-10.668294, Minibatch Loss= 0.5282, Training Accuracy= 0.867, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2340, Reward=-16.329115, Minibatch Loss= 0.5862, Training Accuracy= 0.844, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2300, Reward=-14.346405, Minibatch Loss= 0.4891, Training Accuracy= 0.883, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2310, Reward=-19.833189, Minibatch Loss= 0.3789, Training Accuracy= 0.906, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2350, Reward=-19.161903, Minibatch Loss= 0.4135, Training Accuracy= 0.953, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2360, Reward=-17.520876, Minibatch Loss= 0.4160, Training Accuracy= 0.898, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2320, Reward=-7.9065886, Minibatch Loss= 0.3861, Training Accuracy= 0.898, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2370, Reward=-10.034534, Minibatch Loss= 0.5018, Training Accuracy= 0.836, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2330, Reward=-20.043861, Minibatch Loss= 0.4698, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2380, Reward=-18.464966, Minibatch Loss= 0.4615, Training Accuracy= 0.945, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2340, Reward=-14.711562, Minibatch Loss= 0.4584, Training Accuracy= 0.906, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2390, Reward=-15.724911, Minibatch Loss= 0.4547, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2350, Reward=-14.068648, Minibatch Loss= 0.4067, Training Accuracy= 0.938, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2400, Reward=-15.461275, Minibatch Loss= 0.5602, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2360, Reward=-17.434866, Minibatch Loss= 0.4043, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2410, Reward=-15.729576, Minibatch Loss= 0.4581, Training Accuracy= 0.914, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2370, Reward=-13.20982, Minibatch Loss= 0.5069, Training Accuracy= 0.883, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2420, Reward=-7.494232, Minibatch Loss= 0.3811, Training Accuracy= 0.922, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2380, Reward=-17.583317, Minibatch Loss= 0.5145, Training Accuracy= 0.938, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2430, Reward=-12.543366, Minibatch Loss= 0.4743, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2390, Reward=-15.969873, Minibatch Loss= 0.5268, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2440, Reward=-11.363489, Minibatch Loss= 0.7778, Training Accuracy= 0.789, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2400, Reward=-14.929198, Minibatch Loss= 0.5721, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2450, Reward=-17.429361, Minibatch Loss= 0.4967, Training Accuracy= 0.922, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2410, Reward=-22.286932, Minibatch Loss= 0.6115, Training Accuracy= 0.844, Testing Acc= 0.561404, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2460, Reward=-16.993906, Minibatch Loss= 0.5749, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2420, Reward=-15.138709, Minibatch Loss= 0.4979, Training Accuracy= 0.906, Testing Acc= 0.464912, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2470, Reward=-20.365746, Minibatch Loss= 0.4652, Training Accuracy= 0.930, Testing Acc= 0.482456, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2430, Reward=-16.807455, Minibatch Loss= 0.6666, Training Accuracy= 0.805, Testing Acc= 0.421053, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2480, Reward=-7.9936266, Minibatch Loss= 0.4819, Training Accuracy= 0.891, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2440, Reward=-7.4224377, Minibatch Loss= 0.8945, Training Accuracy= 0.742, Testing Acc= 0.456140, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2490, Reward=-9.950111, Minibatch Loss= 0.3849, Training Accuracy= 0.914, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2450, Reward=-17.758547, Minibatch Loss= 0.5145, Training Accuracy= 0.930, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2500, Reward=-17.445522, Minibatch Loss= 0.4277, Training Accuracy= 0.922, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2460, Reward=-12.4450655, Minibatch Loss= 0.5333, Training Accuracy= 0.898, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2510, Reward=-15.550632, Minibatch Loss= 0.4932, Training Accuracy= 0.930, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2470, Reward=-12.826773, Minibatch Loss= 0.4096, Training Accuracy= 0.945, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2520, Reward=-18.936476, Minibatch Loss= 0.4963, Training Accuracy= 0.898, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2480, Reward=-6.838992, Minibatch Loss= 0.4016, Training Accuracy= 0.898, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2530, Reward=-19.58962, Minibatch Loss= 0.4837, Training Accuracy= 0.906, Testing Acc= 0.464912, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2490, Reward=-10.2780285, Minibatch Loss= 0.5085, Training Accuracy= 0.867, Testing Acc= 0.456140, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2540, Reward=-24.25304, Minibatch Loss= 0.5881, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2500, Reward=-19.505543, Minibatch Loss= 0.4806, Training Accuracy= 0.898, Testing Acc= 0.456140, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2550, Reward=-14.868891, Minibatch Loss= 0.4802, Training Accuracy= 0.906, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2510, Reward=-16.479992, Minibatch Loss= 0.4937, Training Accuracy= 0.883, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2560, Reward=-16.862663, Minibatch Loss= 0.4338, Training Accuracy= 0.930, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2520, Reward=-16.267818, Minibatch Loss= 0.4648, Training Accuracy= 0.891, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2570, Reward=-17.559286, Minibatch Loss= 0.5068, Training Accuracy= 0.867, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2530, Reward=-14.13386, Minibatch Loss= 0.4135, Training Accuracy= 0.898, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2580, Reward=-14.382625, Minibatch Loss= 0.4168, Training Accuracy= 0.953, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2540, Reward=-18.393295, Minibatch Loss= 0.3763, Training Accuracy= 0.914, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2590, Reward=-15.268893, Minibatch Loss= 0.5460, Training Accuracy= 0.867, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2550, Reward=-14.63526, Minibatch Loss= 0.4036, Training Accuracy= 0.891, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2600, Reward=-19.411718, Minibatch Loss= 0.4623, Training Accuracy= 0.930, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2610, Reward=-2.564004, Minibatch Loss= 0.5259, Training Accuracy= 0.906, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2560, Reward=-14.443566, Minibatch Loss= 0.4328, Training Accuracy= 0.859, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2620, Reward=-14.495699, Minibatch Loss= 0.4639, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2570, Reward=-17.237185, Minibatch Loss= 0.3980, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2580, Reward=-17.889671, Minibatch Loss= 0.6861, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2630, Reward=-15.072556, Minibatch Loss= 0.5088, Training Accuracy= 0.906, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2640, Reward=-13.799189, Minibatch Loss= 0.5155, Training Accuracy= 0.859, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2590, Reward=-32.03886, Minibatch Loss= 0.9064, Training Accuracy= 0.773, Testing Acc= 0.482456, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2650, Reward=-20.03669, Minibatch Loss= 0.5078, Training Accuracy= 0.875, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2600, Reward=-29.441975, Minibatch Loss= 0.8685, Training Accuracy= 0.797, Testing Acc= 0.456140, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2660, Reward=-12.334667, Minibatch Loss= 0.7130, Training Accuracy= 0.820, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2610, Reward=-15.833233, Minibatch Loss= 0.7398, Training Accuracy= 0.898, Testing Acc= 0.473684, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2670, Reward=-12.833427, Minibatch Loss= 0.5465, Training Accuracy= 0.836, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2620, Reward=-29.14162, Minibatch Loss= 0.6639, Training Accuracy= 0.906, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2680, Reward=-14.184695, Minibatch Loss= 0.8095, Training Accuracy= 0.867, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2630, Reward=-18.040638, Minibatch Loss= 0.6689, Training Accuracy= 0.852, Testing Acc= 0.464912, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2690, Reward=-11.350214, Minibatch Loss= 0.5341, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2640, Reward=-18.952627, Minibatch Loss= 0.6143, Training Accuracy= 0.898, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2700, Reward=-21.259346, Minibatch Loss= 0.4461, Training Accuracy= 0.922, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2710, Reward=-13.625023, Minibatch Loss= 0.4298, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2650, Reward=-19.90137, Minibatch Loss= 0.8673, Training Accuracy= 0.812, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2720, Reward=-12.83958, Minibatch Loss= 0.4225, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2660, Reward=-16.365294, Minibatch Loss= 0.8086, Training Accuracy= 0.828, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2730, Reward=-9.821123, Minibatch Loss= 0.4977, Training Accuracy= 0.922, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2670, Reward=-16.632133, Minibatch Loss= 0.6274, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2740, Reward=-7.8158836, Minibatch Loss= 0.4125, Training Accuracy= 0.945, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2680, Reward=-11.940291, Minibatch Loss= 0.5297, Training Accuracy= 0.922, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2750, Reward=-9.914669, Minibatch Loss= 0.4991, Training Accuracy= 0.883, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2690, Reward=-8.795389, Minibatch Loss= 0.4562, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2760, Reward=-13.440517, Minibatch Loss= 0.3955, Training Accuracy= 0.922, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2700, Reward=-19.16175, Minibatch Loss= 0.4088, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2770, Reward=-21.066147, Minibatch Loss= 0.4609, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2710, Reward=-14.965702, Minibatch Loss= 0.5849, Training Accuracy= 0.875, Testing Acc= 0.614035, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2780, Reward=-15.588387, Minibatch Loss= 0.4395, Training Accuracy= 0.875, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2720, Reward=-16.853306, Minibatch Loss= 0.5416, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2790, Reward=-11.651697, Minibatch Loss= 0.3771, Training Accuracy= 0.922, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2730, Reward=-11.939957, Minibatch Loss= 0.4497, Training Accuracy= 0.938, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2800, Reward=-13.626666, Minibatch Loss= 0.4466, Training Accuracy= 0.898, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2740, Reward=-7.3914127, Minibatch Loss= 0.3700, Training Accuracy= 0.930, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2810, Reward=-18.040026, Minibatch Loss= 0.5853, Training Accuracy= 0.859, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2750, Reward=-9.807046, Minibatch Loss= 0.4921, Training Accuracy= 0.883, Testing Acc= 0.535088, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2820, Reward=-17.456568, Minibatch Loss= 0.4573, Training Accuracy= 0.930, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2760, Reward=-16.096445, Minibatch Loss= 0.4982, Training Accuracy= 0.859, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2830, Reward=-4.5627384, Minibatch Loss= 0.4171, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2770, Reward=-18.97816, Minibatch Loss= 0.4780, Training Accuracy= 0.898, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2840, Reward=-16.732452, Minibatch Loss= 0.4917, Training Accuracy= 0.891, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2780, Reward=-12.684324, Minibatch Loss= 0.4672, Training Accuracy= 0.891, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2850, Reward=-18.75532, Minibatch Loss= 0.4904, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2790, Reward=-8.948808, Minibatch Loss= 0.3842, Training Accuracy= 0.922, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2860, Reward=-12.655029, Minibatch Loss= 0.3810, Training Accuracy= 0.938, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2800, Reward=-9.918514, Minibatch Loss= 0.3937, Training Accuracy= 0.898, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2870, Reward=-14.709146, Minibatch Loss= 0.3187, Training Accuracy= 0.945, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2810, Reward=-30.611366, Minibatch Loss= 0.7011, Training Accuracy= 0.812, Testing Acc= 0.666667, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2880, Reward=-11.637469, Minibatch Loss= 0.4094, Training Accuracy= 0.906, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2820, Reward=-27.70772, Minibatch Loss= 0.6648, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2890, Reward=-12.549803, Minibatch Loss= 0.4136, Training Accuracy= 0.906, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2830, Reward=-19.569233, Minibatch Loss= 0.5894, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2900, Reward=-16.960041, Minibatch Loss= 0.6222, Training Accuracy= 0.812, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2840, Reward=-21.175743, Minibatch Loss= 0.5320, Training Accuracy= 0.883, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2910, Reward=-17.36115, Minibatch Loss= 0.3825, Training Accuracy= 0.945, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2850, Reward=-23.910877, Minibatch Loss= 0.5818, Training Accuracy= 0.828, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2920, Reward=-8.518511, Minibatch Loss= 0.3770, Training Accuracy= 0.930, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2860, Reward=-16.281507, Minibatch Loss= 0.6145, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2930, Reward=-12.624575, Minibatch Loss= 0.4244, Training Accuracy= 0.906, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2870, Reward=-18.484833, Minibatch Loss= 0.5131, Training Accuracy= 0.914, Testing Acc= 0.543860, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2940, Reward=-17.067791, Minibatch Loss= 0.3941, Training Accuracy= 0.906, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2880, Reward=-14.507949, Minibatch Loss= 0.5431, Training Accuracy= 0.930, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2950, Reward=-10.231223, Minibatch Loss= 0.3672, Training Accuracy= 0.961, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2890, Reward=-14.029543, Minibatch Loss= 0.6885, Training Accuracy= 0.859, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2960, Reward=-8.78891, Minibatch Loss= 0.4187, Training Accuracy= 0.906, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2900, Reward=-20.617094, Minibatch Loss= 0.6402, Training Accuracy= 0.852, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2970, Reward=-13.269599, Minibatch Loss= 0.5136, Training Accuracy= 0.875, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2910, Reward=-18.60903, Minibatch Loss= 0.4997, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2980, Reward=-7.71811, Minibatch Loss= 0.3684, Training Accuracy= 0.945, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Step 2920, Reward=-13.000792, Minibatch Loss= 0.5272, Training Accuracy= 0.836, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2990, Reward=-7.3449535, Minibatch Loss= 0.2934, Training Accuracy= 0.961, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.863496, Max AP=  0.870348\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.81578946\n",
      "Step 2930, Reward=-14.028984, Minibatch Loss= 0.4354, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2940, Reward=-12.957996, Minibatch Loss= 0.3800, Training Accuracy= 0.930, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 2950, Reward=-13.109118, Minibatch Loss= 0.4121, Training Accuracy= 0.906, Testing Acc= 0.517544, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 10, Reward=-57.064373, Minibatch Loss= 1.0555, Training Accuracy= 0.609, Testing Acc= 0.649123, Max Final Accuracy=  0.649123, Max AUC=  0.612804, Max AP=  0.643418\n",
      "Step 2960, Reward=-11.925697, Minibatch Loss= 0.4518, Training Accuracy= 0.914, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 20, Reward=-56.51365, Minibatch Loss= 0.7431, Training Accuracy= 0.625, Testing Acc= 0.692982, Max Final Accuracy=  0.692982, Max AUC=  0.733764, Max AP=  0.767156\n",
      "Step 2970, Reward=-15.342776, Minibatch Loss= 0.4623, Training Accuracy= 0.922, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 30, Reward=-50.85817, Minibatch Loss= 0.6519, Training Accuracy= 0.773, Testing Acc= 0.526316, Max Final Accuracy=  0.692982, Max AUC=  0.733764, Max AP=  0.767156\n",
      "Step 2980, Reward=-11.784017, Minibatch Loss= 0.4337, Training Accuracy= 0.945, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 40, Reward=-36.77905, Minibatch Loss= 0.6764, Training Accuracy= 0.672, Testing Acc= 0.649123, Max Final Accuracy=  0.692982, Max AUC=  0.733764, Max AP=  0.767156\n",
      "Step 2990, Reward=-12.178314, Minibatch Loss= 0.4143, Training Accuracy= 0.898, Testing Acc= 0.526316, Max Final Accuracy=  0.798246, Max AUC=  0.861034, Max AP=  0.882235\n",
      "Step 50, Reward=-32.793488, Minibatch Loss= 0.5398, Training Accuracy= 0.844, Testing Acc= 0.526316, Max Final Accuracy=  0.692982, Max AUC=  0.733764, Max AP=  0.767156\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.7982456\n",
      "Step 60, Reward=-33.50336, Minibatch Loss= 0.5847, Training Accuracy= 0.766, Testing Acc= 0.500000, Max Final Accuracy=  0.692982, Max AUC=  0.733764, Max AP=  0.767156\n",
      "Step 70, Reward=-24.594772, Minibatch Loss= 0.8232, Training Accuracy= 0.719, Testing Acc= 0.789474, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 10, Reward=-59.63179, Minibatch Loss= 1.0539, Training Accuracy= 0.602, Testing Acc= 0.640351, Max Final Accuracy=  0.640351, Max AUC=  0.758695, Max AP=  0.819538\n",
      "Step 80, Reward=-32.721413, Minibatch Loss= 0.6370, Training Accuracy= 0.773, Testing Acc= 0.561404, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 20, Reward=-54.90206, Minibatch Loss= 0.7183, Training Accuracy= 0.758, Testing Acc= 0.684211, Max Final Accuracy=  0.684211, Max AUC=  0.772392, Max AP=  0.779670\n",
      "Step 90, Reward=-32.191486, Minibatch Loss= 0.6056, Training Accuracy= 0.844, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 30, Reward=-56.622536, Minibatch Loss= 0.7529, Training Accuracy= 0.758, Testing Acc= 0.684211, Max Final Accuracy=  0.684211, Max AUC=  0.772392, Max AP=  0.779670\n",
      "Step 100, Reward=-29.023958, Minibatch Loss= 0.6362, Training Accuracy= 0.836, Testing Acc= 0.754386, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 40, Reward=-43.47897, Minibatch Loss= 0.6454, Training Accuracy= 0.766, Testing Acc= 0.631579, Max Final Accuracy=  0.684211, Max AUC=  0.772392, Max AP=  0.779670\n",
      "Step 110, Reward=-19.160324, Minibatch Loss= 0.5504, Training Accuracy= 0.867, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 50, Reward=-34.543285, Minibatch Loss= 0.6983, Training Accuracy= 0.727, Testing Acc= 0.526316, Max Final Accuracy=  0.684211, Max AUC=  0.772392, Max AP=  0.779670\n",
      "Step 120, Reward=-31.514273, Minibatch Loss= 0.6816, Training Accuracy= 0.773, Testing Acc= 0.684211, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 60, Reward=-32.21219, Minibatch Loss= 0.5674, Training Accuracy= 0.820, Testing Acc= 0.649123, Max Final Accuracy=  0.684211, Max AUC=  0.772392, Max AP=  0.779670\n",
      "Step 130, Reward=-26.283321, Minibatch Loss= 0.6358, Training Accuracy= 0.797, Testing Acc= 0.719298, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 70, Reward=-29.102337, Minibatch Loss= 0.6187, Training Accuracy= 0.789, Testing Acc= 0.491228, Max Final Accuracy=  0.684211, Max AUC=  0.772392, Max AP=  0.779670\n",
      "Step 140, Reward=-25.027046, Minibatch Loss= 0.5633, Training Accuracy= 0.867, Testing Acc= 0.745614, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 80, Reward=-30.675224, Minibatch Loss= 0.6186, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.684211, Max AUC=  0.772392, Max AP=  0.779670\n",
      "Step 150, Reward=-25.82272, Minibatch Loss= 0.6131, Training Accuracy= 0.844, Testing Acc= 0.763158, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 90, Reward=-32.015114, Minibatch Loss= 0.7094, Training Accuracy= 0.781, Testing Acc= 0.771930, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 160, Reward=-34.612747, Minibatch Loss= 0.6503, Training Accuracy= 0.797, Testing Acc= 0.780702, Max Final Accuracy=  0.789474, Max AUC=  0.842875, Max AP=  0.881023\n",
      "Step 100, Reward=-23.785435, Minibatch Loss= 0.5687, Training Accuracy= 0.867, Testing Acc= 0.535088, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 170, Reward=-28.025356, Minibatch Loss= 0.6045, Training Accuracy= 0.844, Testing Acc= 0.798246, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 110, Reward=-27.281883, Minibatch Loss= 0.6474, Training Accuracy= 0.828, Testing Acc= 0.666667, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 180, Reward=-22.348267, Minibatch Loss= 0.5900, Training Accuracy= 0.852, Testing Acc= 0.798246, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 120, Reward=-28.489517, Minibatch Loss= 0.6669, Training Accuracy= 0.797, Testing Acc= 0.640351, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 190, Reward=-26.716948, Minibatch Loss= 0.5966, Training Accuracy= 0.836, Testing Acc= 0.771930, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 130, Reward=-26.275633, Minibatch Loss= 0.6682, Training Accuracy= 0.875, Testing Acc= 0.701754, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 200, Reward=-20.412249, Minibatch Loss= 0.6885, Training Accuracy= 0.805, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 140, Reward=-17.007566, Minibatch Loss= 0.6514, Training Accuracy= 0.844, Testing Acc= 0.684211, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 210, Reward=-22.036734, Minibatch Loss= 0.6320, Training Accuracy= 0.891, Testing Acc= 0.736842, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 150, Reward=-25.601318, Minibatch Loss= 0.5784, Training Accuracy= 0.883, Testing Acc= 0.675439, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 220, Reward=-26.968367, Minibatch Loss= 0.6550, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 160, Reward=-30.011198, Minibatch Loss= 0.5858, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 230, Reward=-25.733757, Minibatch Loss= 0.6485, Training Accuracy= 0.766, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 170, Reward=-27.8646, Minibatch Loss= 0.5446, Training Accuracy= 0.867, Testing Acc= 0.640351, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 240, Reward=-24.13649, Minibatch Loss= 0.7247, Training Accuracy= 0.758, Testing Acc= 0.649123, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 180, Reward=-20.842518, Minibatch Loss= 0.7004, Training Accuracy= 0.742, Testing Acc= 0.649123, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 250, Reward=-35.748425, Minibatch Loss= 0.7261, Training Accuracy= 0.781, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 190, Reward=-32.205368, Minibatch Loss= 0.6055, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 260, Reward=-24.061716, Minibatch Loss= 0.6697, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 270, Reward=-27.86782, Minibatch Loss= 0.6124, Training Accuracy= 0.859, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 200, Reward=-29.348091, Minibatch Loss= 0.7102, Training Accuracy= 0.805, Testing Acc= 0.587719, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 210, Reward=-25.902264, Minibatch Loss= 0.7588, Training Accuracy= 0.828, Testing Acc= 0.684211, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 280, Reward=-25.653873, Minibatch Loss= 0.5463, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 290, Reward=-20.270641, Minibatch Loss= 0.6572, Training Accuracy= 0.820, Testing Acc= 0.508772, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 220, Reward=-25.433971, Minibatch Loss= 0.5375, Training Accuracy= 0.875, Testing Acc= 0.578947, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 230, Reward=-9.067308, Minibatch Loss= 0.6295, Training Accuracy= 0.852, Testing Acc= 0.719298, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 300, Reward=-26.538336, Minibatch Loss= 0.8114, Training Accuracy= 0.766, Testing Acc= 0.745614, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 240, Reward=-27.914713, Minibatch Loss= 0.6304, Training Accuracy= 0.906, Testing Acc= 0.631579, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 310, Reward=-22.491955, Minibatch Loss= 0.5714, Training Accuracy= 0.883, Testing Acc= 0.745614, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 320, Reward=-29.622234, Minibatch Loss= 0.5412, Training Accuracy= 0.883, Testing Acc= 0.631579, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 250, Reward=-26.989195, Minibatch Loss= 0.5997, Training Accuracy= 0.867, Testing Acc= 0.543860, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 260, Reward=-20.737516, Minibatch Loss= 0.6185, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 330, Reward=-21.888319, Minibatch Loss= 0.6111, Training Accuracy= 0.805, Testing Acc= 0.491228, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 340, Reward=-30.8023, Minibatch Loss= 0.5381, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 270, Reward=-26.901356, Minibatch Loss= 0.8729, Training Accuracy= 0.859, Testing Acc= 0.631579, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 350, Reward=-19.891844, Minibatch Loss= 0.5227, Training Accuracy= 0.938, Testing Acc= 0.596491, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 280, Reward=-24.364662, Minibatch Loss= 0.7829, Training Accuracy= 0.812, Testing Acc= 0.622807, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 290, Reward=-23.607468, Minibatch Loss= 0.6790, Training Accuracy= 0.867, Testing Acc= 0.561404, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 360, Reward=-19.472643, Minibatch Loss= 0.5880, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 370, Reward=-28.98362, Minibatch Loss= 0.5352, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 300, Reward=-22.166702, Minibatch Loss= 0.6728, Training Accuracy= 0.852, Testing Acc= 0.745614, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 380, Reward=-10.827362, Minibatch Loss= 0.5202, Training Accuracy= 0.883, Testing Acc= 0.622807, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 310, Reward=-23.270792, Minibatch Loss= 0.6623, Training Accuracy= 0.859, Testing Acc= 0.701754, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 390, Reward=-15.650311, Minibatch Loss= 0.5926, Training Accuracy= 0.836, Testing Acc= 0.745614, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 320, Reward=-25.499428, Minibatch Loss= 0.6405, Training Accuracy= 0.898, Testing Acc= 0.535088, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 400, Reward=-20.585161, Minibatch Loss= 0.6128, Training Accuracy= 0.883, Testing Acc= 0.657895, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 330, Reward=-30.119743, Minibatch Loss= 0.6390, Training Accuracy= 0.852, Testing Acc= 0.535088, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 410, Reward=-24.445986, Minibatch Loss= 0.8483, Training Accuracy= 0.789, Testing Acc= 0.649123, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 340, Reward=-22.529757, Minibatch Loss= 0.5684, Training Accuracy= 0.891, Testing Acc= 0.535088, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 420, Reward=-33.30139, Minibatch Loss= 0.6766, Training Accuracy= 0.836, Testing Acc= 0.666667, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 350, Reward=-21.522293, Minibatch Loss= 0.6787, Training Accuracy= 0.859, Testing Acc= 0.543860, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 430, Reward=-28.565466, Minibatch Loss= 0.7061, Training Accuracy= 0.836, Testing Acc= 0.552632, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 360, Reward=-26.098944, Minibatch Loss= 0.6852, Training Accuracy= 0.805, Testing Acc= 0.596491, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 440, Reward=-23.505192, Minibatch Loss= 0.6083, Training Accuracy= 0.875, Testing Acc= 0.640351, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 370, Reward=-27.826824, Minibatch Loss= 0.6717, Training Accuracy= 0.859, Testing Acc= 0.552632, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 450, Reward=-22.425343, Minibatch Loss= 0.6620, Training Accuracy= 0.844, Testing Acc= 0.798246, Max Final Accuracy=  0.798246, Max AUC=  0.869344, Max AP=  0.875841\n",
      "Step 380, Reward=-25.268595, Minibatch Loss= 0.5851, Training Accuracy= 0.891, Testing Acc= 0.640351, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 460, Reward=-26.977423, Minibatch Loss= 0.6712, Training Accuracy= 0.805, Testing Acc= 0.807018, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 390, Reward=-16.588314, Minibatch Loss= 0.5805, Training Accuracy= 0.867, Testing Acc= 0.631579, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 470, Reward=-12.02379, Minibatch Loss= 0.6505, Training Accuracy= 0.852, Testing Acc= 0.719298, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 400, Reward=-19.133846, Minibatch Loss= 0.6361, Training Accuracy= 0.828, Testing Acc= 0.570175, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 480, Reward=-31.5106, Minibatch Loss= 0.6976, Training Accuracy= 0.836, Testing Acc= 0.798246, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 410, Reward=-17.01268, Minibatch Loss= 0.5697, Training Accuracy= 0.898, Testing Acc= 0.631579, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 490, Reward=-22.883633, Minibatch Loss= 0.5633, Training Accuracy= 0.922, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 420, Reward=-27.387096, Minibatch Loss= 0.6073, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 500, Reward=-29.41941, Minibatch Loss= 0.7807, Training Accuracy= 0.820, Testing Acc= 0.771930, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 430, Reward=-24.31334, Minibatch Loss= 0.7986, Training Accuracy= 0.820, Testing Acc= 0.605263, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 510, Reward=-25.00979, Minibatch Loss= 0.6597, Training Accuracy= 0.852, Testing Acc= 0.535088, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 440, Reward=-24.277462, Minibatch Loss= 0.6244, Training Accuracy= 0.898, Testing Acc= 0.578947, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 520, Reward=-24.764025, Minibatch Loss= 0.6375, Training Accuracy= 0.883, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 450, Reward=-19.0061, Minibatch Loss= 0.5986, Training Accuracy= 0.906, Testing Acc= 0.578947, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 530, Reward=-24.674255, Minibatch Loss= 0.5739, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 460, Reward=-20.200096, Minibatch Loss= 0.5930, Training Accuracy= 0.859, Testing Acc= 0.657895, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 540, Reward=-14.703705, Minibatch Loss= 0.5173, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 470, Reward=-22.623322, Minibatch Loss= 0.5800, Training Accuracy= 0.891, Testing Acc= 0.640351, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 550, Reward=-18.659569, Minibatch Loss= 0.6171, Training Accuracy= 0.812, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 480, Reward=-19.573263, Minibatch Loss= 0.4987, Training Accuracy= 0.906, Testing Acc= 0.596491, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 560, Reward=-26.78299, Minibatch Loss= 0.5826, Training Accuracy= 0.875, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 490, Reward=-18.72916, Minibatch Loss= 0.5068, Training Accuracy= 0.906, Testing Acc= 0.596491, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 570, Reward=-23.653294, Minibatch Loss= 0.5644, Training Accuracy= 0.898, Testing Acc= 0.710526, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 500, Reward=-14.065473, Minibatch Loss= 0.5350, Training Accuracy= 0.867, Testing Acc= 0.640351, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 580, Reward=-24.96698, Minibatch Loss= 0.5956, Training Accuracy= 0.867, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 510, Reward=-18.555511, Minibatch Loss= 0.5737, Training Accuracy= 0.906, Testing Acc= 0.692982, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 590, Reward=-17.874943, Minibatch Loss= 0.5707, Training Accuracy= 0.914, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 520, Reward=-14.834372, Minibatch Loss= 0.6257, Training Accuracy= 0.805, Testing Acc= 0.605263, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 600, Reward=-20.048624, Minibatch Loss= 0.7054, Training Accuracy= 0.844, Testing Acc= 0.666667, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 530, Reward=-24.773031, Minibatch Loss= 0.6185, Training Accuracy= 0.836, Testing Acc= 0.614035, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 610, Reward=-24.37931, Minibatch Loss= 0.5652, Training Accuracy= 0.898, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 540, Reward=-23.750835, Minibatch Loss= 0.5311, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 620, Reward=-21.327473, Minibatch Loss= 0.6505, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 550, Reward=-20.814396, Minibatch Loss= 0.6309, Training Accuracy= 0.836, Testing Acc= 0.543860, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 630, Reward=-20.668169, Minibatch Loss= 0.6434, Training Accuracy= 0.859, Testing Acc= 0.491228, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 640, Reward=-25.936018, Minibatch Loss= 0.5248, Training Accuracy= 0.906, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 560, Reward=-22.167168, Minibatch Loss= 0.5627, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 650, Reward=-15.704012, Minibatch Loss= 0.5824, Training Accuracy= 0.883, Testing Acc= 0.728070, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 570, Reward=-21.914032, Minibatch Loss= 0.4723, Training Accuracy= 0.930, Testing Acc= 0.614035, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 660, Reward=-17.612415, Minibatch Loss= 0.5449, Training Accuracy= 0.883, Testing Acc= 0.780702, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 580, Reward=-19.567657, Minibatch Loss= 0.5019, Training Accuracy= 0.891, Testing Acc= 0.622807, Max Final Accuracy=  0.771930, Max AUC=  0.863189, Max AP=  0.891350\n",
      "Step 670, Reward=-29.699379, Minibatch Loss= 0.7129, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 590, Reward=-23.85655, Minibatch Loss= 0.4490, Training Accuracy= 0.938, Testing Acc= 0.789474, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 680, Reward=-27.396965, Minibatch Loss= 0.7045, Training Accuracy= 0.891, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 600, Reward=-18.898975, Minibatch Loss= 0.6704, Training Accuracy= 0.820, Testing Acc= 0.754386, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 690, Reward=-29.745787, Minibatch Loss= 0.5779, Training Accuracy= 0.867, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 610, Reward=-21.840328, Minibatch Loss= 0.5651, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 700, Reward=-27.529442, Minibatch Loss= 0.8027, Training Accuracy= 0.828, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 620, Reward=-21.959726, Minibatch Loss= 0.5711, Training Accuracy= 0.891, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 710, Reward=-27.214647, Minibatch Loss= 0.5929, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 630, Reward=-18.510426, Minibatch Loss= 0.5039, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 720, Reward=-17.727571, Minibatch Loss= 0.5834, Training Accuracy= 0.875, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 640, Reward=-25.54781, Minibatch Loss= 0.5086, Training Accuracy= 0.938, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 730, Reward=-19.30495, Minibatch Loss= 0.4780, Training Accuracy= 0.930, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 740, Reward=-18.824057, Minibatch Loss= 0.5819, Training Accuracy= 0.898, Testing Acc= 0.491228, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 650, Reward=-20.022215, Minibatch Loss= 0.5788, Training Accuracy= 0.828, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 750, Reward=-24.961367, Minibatch Loss= 0.4695, Training Accuracy= 0.906, Testing Acc= 0.491228, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 660, Reward=-21.66337, Minibatch Loss= 0.5927, Training Accuracy= 0.875, Testing Acc= 0.684211, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 670, Reward=-19.363102, Minibatch Loss= 0.6095, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 760, Reward=-22.089302, Minibatch Loss= 0.5436, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 680, Reward=-22.049421, Minibatch Loss= 0.5375, Training Accuracy= 0.914, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 770, Reward=-20.956423, Minibatch Loss= 0.6028, Training Accuracy= 0.859, Testing Acc= 0.491228, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 690, Reward=-32.056866, Minibatch Loss= 0.6836, Training Accuracy= 0.844, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 780, Reward=-15.503423, Minibatch Loss= 0.5769, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 790, Reward=-17.397089, Minibatch Loss= 0.5655, Training Accuracy= 0.867, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 700, Reward=-21.02642, Minibatch Loss= 0.6463, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 800, Reward=-19.461498, Minibatch Loss= 0.5354, Training Accuracy= 0.922, Testing Acc= 0.535088, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 710, Reward=-21.350163, Minibatch Loss= 0.5639, Training Accuracy= 0.922, Testing Acc= 0.570175, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 810, Reward=-16.744259, Minibatch Loss= 0.8491, Training Accuracy= 0.805, Testing Acc= 0.692982, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 720, Reward=-26.594524, Minibatch Loss= 0.5820, Training Accuracy= 0.852, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 820, Reward=-26.738981, Minibatch Loss= 0.6386, Training Accuracy= 0.859, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 730, Reward=-18.143503, Minibatch Loss= 0.7023, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 830, Reward=-19.85575, Minibatch Loss= 0.7775, Training Accuracy= 0.836, Testing Acc= 0.701754, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 740, Reward=-22.235065, Minibatch Loss= 0.8027, Training Accuracy= 0.750, Testing Acc= 0.745614, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 840, Reward=-21.915154, Minibatch Loss= 0.6792, Training Accuracy= 0.859, Testing Acc= 0.657895, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 750, Reward=-29.944416, Minibatch Loss= 0.7438, Training Accuracy= 0.844, Testing Acc= 0.657895, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 850, Reward=-22.491951, Minibatch Loss= 0.7525, Training Accuracy= 0.805, Testing Acc= 0.789474, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 760, Reward=-27.160477, Minibatch Loss= 0.8058, Training Accuracy= 0.820, Testing Acc= 0.570175, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 860, Reward=-22.802536, Minibatch Loss= 0.8356, Training Accuracy= 0.828, Testing Acc= 0.684211, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 770, Reward=-27.27402, Minibatch Loss= 0.6924, Training Accuracy= 0.852, Testing Acc= 0.657895, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 870, Reward=-27.620518, Minibatch Loss= 0.6840, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 780, Reward=-24.40276, Minibatch Loss= 0.8812, Training Accuracy= 0.750, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 880, Reward=-19.781769, Minibatch Loss= 0.8447, Training Accuracy= 0.781, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 790, Reward=-28.529064, Minibatch Loss= 0.7484, Training Accuracy= 0.828, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 890, Reward=-26.203243, Minibatch Loss= 0.7156, Training Accuracy= 0.859, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 800, Reward=-27.080217, Minibatch Loss= 0.8046, Training Accuracy= 0.820, Testing Acc= 0.719298, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 900, Reward=-22.430511, Minibatch Loss= 0.7353, Training Accuracy= 0.812, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 810, Reward=-22.831373, Minibatch Loss= 0.6915, Training Accuracy= 0.914, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 910, Reward=-24.521515, Minibatch Loss= 0.6736, Training Accuracy= 0.828, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 820, Reward=-17.05727, Minibatch Loss= 0.7453, Training Accuracy= 0.781, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 920, Reward=-29.997166, Minibatch Loss= 0.7539, Training Accuracy= 0.812, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 830, Reward=-28.139315, Minibatch Loss= 0.6763, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 930, Reward=-32.464973, Minibatch Loss= 0.8849, Training Accuracy= 0.758, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 840, Reward=-31.029076, Minibatch Loss= 0.8891, Training Accuracy= 0.812, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 940, Reward=-32.490574, Minibatch Loss= 0.7813, Training Accuracy= 0.891, Testing Acc= 0.692982, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 850, Reward=-33.0232, Minibatch Loss= 1.0500, Training Accuracy= 0.750, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 950, Reward=-14.25426, Minibatch Loss= 0.7468, Training Accuracy= 0.859, Testing Acc= 0.692982, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 860, Reward=-14.661776, Minibatch Loss= 0.9216, Training Accuracy= 0.859, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 960, Reward=-17.449728, Minibatch Loss= 0.6345, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 870, Reward=-19.044722, Minibatch Loss= 0.8060, Training Accuracy= 0.859, Testing Acc= 0.570175, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 970, Reward=-19.623343, Minibatch Loss= 0.5779, Training Accuracy= 0.891, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 880, Reward=-16.172222, Minibatch Loss= 0.6407, Training Accuracy= 0.883, Testing Acc= 0.429825, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 980, Reward=-18.145704, Minibatch Loss= 0.5411, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 890, Reward=-24.951206, Minibatch Loss= 0.6798, Training Accuracy= 0.836, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 990, Reward=-19.13795, Minibatch Loss= 0.4810, Training Accuracy= 0.938, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 900, Reward=-31.830654, Minibatch Loss= 1.1747, Training Accuracy= 0.719, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1000, Reward=-21.974874, Minibatch Loss= 0.6357, Training Accuracy= 0.844, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 910, Reward=-32.312202, Minibatch Loss= 1.0778, Training Accuracy= 0.797, Testing Acc= 0.438596, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1010, Reward=-20.889753, Minibatch Loss= 0.6002, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 920, Reward=-29.0079, Minibatch Loss= 0.8464, Training Accuracy= 0.875, Testing Acc= 0.429825, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1020, Reward=-16.126402, Minibatch Loss= 0.6465, Training Accuracy= 0.859, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 930, Reward=-24.066084, Minibatch Loss= 0.6961, Training Accuracy= 0.914, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1030, Reward=-20.020708, Minibatch Loss= 0.5774, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 940, Reward=-15.57193, Minibatch Loss= 0.7673, Training Accuracy= 0.820, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1040, Reward=-17.087566, Minibatch Loss= 0.4991, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 950, Reward=-22.573536, Minibatch Loss= 0.6287, Training Accuracy= 0.883, Testing Acc= 0.684211, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1050, Reward=-13.205899, Minibatch Loss= 0.5197, Training Accuracy= 0.867, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 960, Reward=-22.09689, Minibatch Loss= 0.6745, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1060, Reward=-18.294434, Minibatch Loss= 0.6487, Training Accuracy= 0.805, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 970, Reward=-18.263908, Minibatch Loss= 0.6226, Training Accuracy= 0.836, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1070, Reward=-27.12547, Minibatch Loss= 0.4707, Training Accuracy= 0.922, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 980, Reward=-19.99398, Minibatch Loss= 0.6138, Training Accuracy= 0.844, Testing Acc= 0.684211, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1080, Reward=-21.753868, Minibatch Loss= 0.6223, Training Accuracy= 0.812, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1090, Reward=-28.109797, Minibatch Loss= 0.6684, Training Accuracy= 0.789, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 990, Reward=-18.946405, Minibatch Loss= 0.6141, Training Accuracy= 0.820, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1000, Reward=-16.696697, Minibatch Loss= 0.5112, Training Accuracy= 0.906, Testing Acc= 0.561404, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1100, Reward=-27.510675, Minibatch Loss= 0.7146, Training Accuracy= 0.844, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1110, Reward=-28.583117, Minibatch Loss= 0.6883, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1010, Reward=-15.442842, Minibatch Loss= 0.4685, Training Accuracy= 0.891, Testing Acc= 0.675439, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1120, Reward=-23.77324, Minibatch Loss= 0.6532, Training Accuracy= 0.820, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1020, Reward=-19.897797, Minibatch Loss= 0.6110, Training Accuracy= 0.828, Testing Acc= 0.561404, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1130, Reward=-20.390007, Minibatch Loss= 0.7038, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1030, Reward=-22.762047, Minibatch Loss= 0.4993, Training Accuracy= 0.891, Testing Acc= 0.719298, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1140, Reward=-20.314556, Minibatch Loss= 0.7281, Training Accuracy= 0.781, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1040, Reward=-18.492641, Minibatch Loss= 0.5055, Training Accuracy= 0.883, Testing Acc= 0.754386, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1150, Reward=-33.335693, Minibatch Loss= 0.8443, Training Accuracy= 0.758, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1050, Reward=-23.59622, Minibatch Loss= 0.6121, Training Accuracy= 0.852, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1160, Reward=-22.555567, Minibatch Loss= 0.6954, Training Accuracy= 0.867, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1060, Reward=-18.131857, Minibatch Loss= 0.5985, Training Accuracy= 0.805, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1170, Reward=-15.923868, Minibatch Loss= 0.7309, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1070, Reward=-25.164925, Minibatch Loss= 0.5946, Training Accuracy= 0.914, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1180, Reward=-29.632544, Minibatch Loss= 0.6265, Training Accuracy= 0.867, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1080, Reward=-24.81358, Minibatch Loss= 0.6517, Training Accuracy= 0.836, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1190, Reward=-20.946064, Minibatch Loss= 0.6682, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1090, Reward=-25.173903, Minibatch Loss= 0.5168, Training Accuracy= 0.914, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1200, Reward=-22.929604, Minibatch Loss= 0.6144, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1100, Reward=-9.059665, Minibatch Loss= 0.6105, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1210, Reward=-31.999947, Minibatch Loss= 0.7358, Training Accuracy= 0.812, Testing Acc= 0.728070, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1110, Reward=-20.147232, Minibatch Loss= 0.5466, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1220, Reward=-26.268274, Minibatch Loss= 0.7433, Training Accuracy= 0.836, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1120, Reward=-15.381453, Minibatch Loss= 0.7113, Training Accuracy= 0.750, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1230, Reward=-15.549871, Minibatch Loss= 0.7407, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1130, Reward=-16.87926, Minibatch Loss= 0.5423, Training Accuracy= 0.891, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1240, Reward=-27.906208, Minibatch Loss= 0.6159, Training Accuracy= 0.844, Testing Acc= 0.710526, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1140, Reward=-20.050104, Minibatch Loss= 0.7389, Training Accuracy= 0.820, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1250, Reward=-21.379528, Minibatch Loss= 0.5431, Training Accuracy= 0.891, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1150, Reward=-19.952263, Minibatch Loss= 0.5459, Training Accuracy= 0.891, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1260, Reward=-20.841625, Minibatch Loss= 0.5446, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1160, Reward=-16.856962, Minibatch Loss= 0.5818, Training Accuracy= 0.867, Testing Acc= 0.666667, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1270, Reward=-28.006987, Minibatch Loss= 0.7090, Training Accuracy= 0.828, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1170, Reward=-22.697947, Minibatch Loss= 0.5770, Training Accuracy= 0.852, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1280, Reward=-21.794758, Minibatch Loss= 0.7328, Training Accuracy= 0.891, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1180, Reward=-29.22595, Minibatch Loss= 0.6159, Training Accuracy= 0.836, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1290, Reward=-21.41171, Minibatch Loss= 0.7639, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1190, Reward=-19.97616, Minibatch Loss= 0.7571, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1300, Reward=-16.186403, Minibatch Loss= 0.8481, Training Accuracy= 0.828, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1200, Reward=-20.285706, Minibatch Loss= 0.7329, Training Accuracy= 0.836, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1310, Reward=-21.062325, Minibatch Loss= 0.8099, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1210, Reward=-27.624636, Minibatch Loss= 0.7307, Training Accuracy= 0.758, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1320, Reward=-15.130625, Minibatch Loss= 0.8507, Training Accuracy= 0.789, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1220, Reward=-28.011017, Minibatch Loss= 0.6299, Training Accuracy= 0.891, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1330, Reward=-25.835407, Minibatch Loss= 0.7292, Training Accuracy= 0.836, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1230, Reward=-16.448101, Minibatch Loss= 0.6130, Training Accuracy= 0.859, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1340, Reward=-14.514253, Minibatch Loss= 0.7437, Training Accuracy= 0.836, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1240, Reward=-18.627792, Minibatch Loss= 0.5042, Training Accuracy= 0.938, Testing Acc= 0.421053, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1350, Reward=-19.25513, Minibatch Loss= 0.6078, Training Accuracy= 0.930, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1360, Reward=-13.324773, Minibatch Loss= 0.6403, Training Accuracy= 0.852, Testing Acc= 0.535088, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1250, Reward=-14.963102, Minibatch Loss= 0.5106, Training Accuracy= 0.867, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1370, Reward=-18.780245, Minibatch Loss= 0.7392, Training Accuracy= 0.844, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1260, Reward=-24.03389, Minibatch Loss= 0.8656, Training Accuracy= 0.766, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1380, Reward=-27.361906, Minibatch Loss= 0.7591, Training Accuracy= 0.852, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1270, Reward=-26.13908, Minibatch Loss= 0.8781, Training Accuracy= 0.797, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1390, Reward=-32.724564, Minibatch Loss= 0.7284, Training Accuracy= 0.867, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1280, Reward=-26.24143, Minibatch Loss= 0.7133, Training Accuracy= 0.875, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1400, Reward=-22.737629, Minibatch Loss= 0.6702, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1290, Reward=-19.495596, Minibatch Loss= 0.6748, Training Accuracy= 0.867, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1410, Reward=-19.07411, Minibatch Loss= 0.4941, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1300, Reward=-23.052134, Minibatch Loss= 0.6471, Training Accuracy= 0.828, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1420, Reward=-12.2835045, Minibatch Loss= 0.4910, Training Accuracy= 0.930, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1310, Reward=-20.748846, Minibatch Loss= 0.6808, Training Accuracy= 0.867, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1430, Reward=-10.391737, Minibatch Loss= 0.5276, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1320, Reward=-23.543947, Minibatch Loss= 0.7475, Training Accuracy= 0.820, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1440, Reward=-18.858707, Minibatch Loss= 0.5780, Training Accuracy= 0.852, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1330, Reward=-29.6663, Minibatch Loss= 0.7766, Training Accuracy= 0.859, Testing Acc= 0.701754, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1450, Reward=-21.722067, Minibatch Loss= 0.6247, Training Accuracy= 0.898, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1340, Reward=-25.02372, Minibatch Loss= 0.8641, Training Accuracy= 0.812, Testing Acc= 0.701754, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1460, Reward=-15.158821, Minibatch Loss= 0.4422, Training Accuracy= 0.914, Testing Acc= 0.535088, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1350, Reward=-32.748665, Minibatch Loss= 0.6985, Training Accuracy= 0.875, Testing Acc= 0.763158, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1470, Reward=-14.017167, Minibatch Loss= 0.4495, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1360, Reward=-20.794594, Minibatch Loss= 0.6476, Training Accuracy= 0.906, Testing Acc= 0.701754, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1480, Reward=-17.160763, Minibatch Loss= 0.5791, Training Accuracy= 0.828, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1370, Reward=-15.852871, Minibatch Loss= 0.7726, Training Accuracy= 0.820, Testing Acc= 0.771930, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1490, Reward=-19.974888, Minibatch Loss= 0.6009, Training Accuracy= 0.859, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1380, Reward=-24.799871, Minibatch Loss= 0.9607, Training Accuracy= 0.695, Testing Acc= 0.701754, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1500, Reward=-11.713733, Minibatch Loss= 0.5019, Training Accuracy= 0.844, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1390, Reward=-30.81984, Minibatch Loss= 0.6753, Training Accuracy= 0.914, Testing Acc= 0.754386, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1510, Reward=-16.649387, Minibatch Loss= 0.4529, Training Accuracy= 0.906, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1400, Reward=-20.431831, Minibatch Loss= 0.6562, Training Accuracy= 0.852, Testing Acc= 0.780702, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1520, Reward=-13.383269, Minibatch Loss= 0.4168, Training Accuracy= 0.914, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1410, Reward=-20.879238, Minibatch Loss= 0.7214, Training Accuracy= 0.812, Testing Acc= 0.631579, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1530, Reward=-17.047075, Minibatch Loss= 0.4889, Training Accuracy= 0.906, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1420, Reward=-15.194012, Minibatch Loss= 0.7634, Training Accuracy= 0.812, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1540, Reward=-22.868368, Minibatch Loss= 0.4295, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1430, Reward=-31.07153, Minibatch Loss= 0.6647, Training Accuracy= 0.844, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1550, Reward=-12.725165, Minibatch Loss= 0.4353, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1440, Reward=-25.027582, Minibatch Loss= 0.7233, Training Accuracy= 0.844, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1560, Reward=-15.623906, Minibatch Loss= 0.3790, Training Accuracy= 0.938, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1450, Reward=-25.997635, Minibatch Loss= 0.6732, Training Accuracy= 0.859, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1570, Reward=-9.99369, Minibatch Loss= 0.4066, Training Accuracy= 0.938, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1460, Reward=-33.08646, Minibatch Loss= 0.7338, Training Accuracy= 0.836, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1580, Reward=-10.304175, Minibatch Loss= 0.5020, Training Accuracy= 0.828, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1470, Reward=-21.858673, Minibatch Loss= 0.7175, Training Accuracy= 0.875, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1590, Reward=-19.067154, Minibatch Loss= 0.5713, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1480, Reward=-19.641493, Minibatch Loss= 0.5665, Training Accuracy= 0.930, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1600, Reward=-15.091709, Minibatch Loss= 0.5335, Training Accuracy= 0.953, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1490, Reward=-12.229292, Minibatch Loss= 0.5746, Training Accuracy= 0.906, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1610, Reward=-16.626036, Minibatch Loss= 0.5213, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1500, Reward=-19.54399, Minibatch Loss= 0.9008, Training Accuracy= 0.742, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1620, Reward=-13.768134, Minibatch Loss= 0.4293, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1510, Reward=-33.074818, Minibatch Loss= 0.8027, Training Accuracy= 0.820, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1630, Reward=-15.056237, Minibatch Loss= 0.4146, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1520, Reward=-33.0901, Minibatch Loss= 0.7045, Training Accuracy= 0.891, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1640, Reward=-19.59074, Minibatch Loss= 0.4341, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1530, Reward=-18.890276, Minibatch Loss= 0.6326, Training Accuracy= 0.914, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1650, Reward=-29.734875, Minibatch Loss= 0.6034, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1540, Reward=-17.855587, Minibatch Loss= 0.9786, Training Accuracy= 0.750, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1660, Reward=-23.18636, Minibatch Loss= 0.7252, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1550, Reward=-23.695019, Minibatch Loss= 0.6465, Training Accuracy= 0.883, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1670, Reward=-16.241646, Minibatch Loss= 0.5423, Training Accuracy= 0.945, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1560, Reward=-13.523316, Minibatch Loss= 0.7454, Training Accuracy= 0.852, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1680, Reward=-16.162632, Minibatch Loss= 0.4855, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1570, Reward=-21.439287, Minibatch Loss= 0.6748, Training Accuracy= 0.867, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1690, Reward=-9.91088, Minibatch Loss= 0.3954, Training Accuracy= 0.945, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1580, Reward=-12.516876, Minibatch Loss= 0.5568, Training Accuracy= 0.914, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1700, Reward=-12.785391, Minibatch Loss= 0.5362, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1590, Reward=-20.760166, Minibatch Loss= 0.5740, Training Accuracy= 0.844, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1710, Reward=-17.616774, Minibatch Loss= 0.5907, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1600, Reward=-22.27962, Minibatch Loss= 0.5621, Training Accuracy= 0.883, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1720, Reward=-14.14583, Minibatch Loss= 0.5984, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1610, Reward=-23.080532, Minibatch Loss= 0.7220, Training Accuracy= 0.836, Testing Acc= 0.438596, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1730, Reward=-14.336364, Minibatch Loss= 0.5304, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1620, Reward=-22.569094, Minibatch Loss= 0.7539, Training Accuracy= 0.789, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1740, Reward=-24.648478, Minibatch Loss= 0.5254, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1630, Reward=-28.922716, Minibatch Loss= 0.6792, Training Accuracy= 0.891, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1750, Reward=-24.754177, Minibatch Loss= 0.5505, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1760, Reward=-26.662418, Minibatch Loss= 0.4979, Training Accuracy= 0.922, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1640, Reward=-21.711622, Minibatch Loss= 0.7137, Training Accuracy= 0.867, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1770, Reward=-26.783285, Minibatch Loss= 0.7791, Training Accuracy= 0.680, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1650, Reward=-20.027317, Minibatch Loss= 0.7232, Training Accuracy= 0.781, Testing Acc= 0.429825, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1780, Reward=-27.7979, Minibatch Loss= 0.5659, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1660, Reward=-18.600994, Minibatch Loss= 0.6897, Training Accuracy= 0.844, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1790, Reward=-25.795057, Minibatch Loss= 0.5876, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1670, Reward=-17.822289, Minibatch Loss= 0.6082, Training Accuracy= 0.891, Testing Acc= 0.438596, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1800, Reward=-27.317156, Minibatch Loss= 0.8173, Training Accuracy= 0.758, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1680, Reward=-17.294077, Minibatch Loss= 0.5417, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1810, Reward=-33.383564, Minibatch Loss= 0.6845, Training Accuracy= 0.758, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1690, Reward=-18.890722, Minibatch Loss= 0.4961, Training Accuracy= 0.906, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1820, Reward=-25.441566, Minibatch Loss= 0.6713, Training Accuracy= 0.867, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1700, Reward=-14.049082, Minibatch Loss= 0.4824, Training Accuracy= 0.898, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1830, Reward=-21.171682, Minibatch Loss= 0.5664, Training Accuracy= 0.930, Testing Acc= 0.657895, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1710, Reward=-23.951365, Minibatch Loss= 0.7003, Training Accuracy= 0.758, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1840, Reward=-10.850817, Minibatch Loss= 0.7914, Training Accuracy= 0.820, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1720, Reward=-25.409702, Minibatch Loss= 0.6833, Training Accuracy= 0.914, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1850, Reward=-7.225246, Minibatch Loss= 0.9208, Training Accuracy= 0.781, Testing Acc= 0.710526, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1730, Reward=-22.314184, Minibatch Loss= 0.7860, Training Accuracy= 0.844, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1860, Reward=-30.789785, Minibatch Loss= 0.6511, Training Accuracy= 0.859, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1740, Reward=-26.917791, Minibatch Loss= 0.7178, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1870, Reward=-26.03111, Minibatch Loss= 0.6038, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1750, Reward=-16.56657, Minibatch Loss= 0.6870, Training Accuracy= 0.898, Testing Acc= 0.429825, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1880, Reward=-21.314732, Minibatch Loss= 0.5322, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1760, Reward=-19.760431, Minibatch Loss= 0.6829, Training Accuracy= 0.867, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1890, Reward=-27.60836, Minibatch Loss= 0.5345, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1770, Reward=-20.716019, Minibatch Loss= 0.6489, Training Accuracy= 0.898, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1900, Reward=-12.959742, Minibatch Loss= 0.5103, Training Accuracy= 0.906, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1780, Reward=-20.096214, Minibatch Loss= 0.5864, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1910, Reward=-21.10683, Minibatch Loss= 0.4350, Training Accuracy= 0.914, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1790, Reward=-21.27935, Minibatch Loss= 0.6802, Training Accuracy= 0.883, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1920, Reward=-8.845234, Minibatch Loss= 0.6004, Training Accuracy= 0.867, Testing Acc= 0.736842, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1800, Reward=-22.80767, Minibatch Loss= 0.6108, Training Accuracy= 0.883, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1930, Reward=-13.697762, Minibatch Loss= 0.4796, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1810, Reward=-16.606735, Minibatch Loss= 0.4898, Training Accuracy= 0.922, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1940, Reward=-17.042866, Minibatch Loss= 0.5335, Training Accuracy= 0.828, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1820, Reward=-23.383541, Minibatch Loss= 0.5036, Training Accuracy= 0.875, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1950, Reward=-16.476439, Minibatch Loss= 0.4659, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1830, Reward=-26.478666, Minibatch Loss= 0.7193, Training Accuracy= 0.836, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1960, Reward=-14.182901, Minibatch Loss= 0.4278, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1840, Reward=-26.712248, Minibatch Loss= 0.6704, Training Accuracy= 0.906, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1970, Reward=-11.72211, Minibatch Loss= 0.3716, Training Accuracy= 0.969, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1850, Reward=-17.141533, Minibatch Loss= 0.5936, Training Accuracy= 0.906, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1980, Reward=-16.654083, Minibatch Loss= 0.6629, Training Accuracy= 0.844, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1860, Reward=-13.553712, Minibatch Loss= 0.8832, Training Accuracy= 0.867, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 1990, Reward=-16.305973, Minibatch Loss= 0.5075, Training Accuracy= 0.867, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1870, Reward=-19.866505, Minibatch Loss= 0.5837, Training Accuracy= 0.953, Testing Acc= 0.631579, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2000, Reward=-20.14996, Minibatch Loss= 0.5468, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1880, Reward=-22.700184, Minibatch Loss= 0.7084, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2010, Reward=-20.238815, Minibatch Loss= 0.5047, Training Accuracy= 0.922, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1890, Reward=-20.294065, Minibatch Loss= 0.6438, Training Accuracy= 0.852, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2020, Reward=-16.641647, Minibatch Loss= 0.4255, Training Accuracy= 0.961, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1900, Reward=-16.76318, Minibatch Loss= 0.5153, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2030, Reward=-17.104485, Minibatch Loss= 0.5568, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1910, Reward=-9.410108, Minibatch Loss= 0.5582, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2040, Reward=-11.61908, Minibatch Loss= 0.5014, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1920, Reward=-18.381433, Minibatch Loss= 0.6190, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2050, Reward=-19.874634, Minibatch Loss= 0.4873, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1930, Reward=-19.552868, Minibatch Loss= 0.5249, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2060, Reward=-17.863188, Minibatch Loss= 0.5078, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1940, Reward=-25.91298, Minibatch Loss= 0.5435, Training Accuracy= 0.938, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2070, Reward=-20.724335, Minibatch Loss= 0.4354, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1950, Reward=-18.505665, Minibatch Loss= 0.5934, Training Accuracy= 0.836, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2080, Reward=-10.254283, Minibatch Loss= 0.4504, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1960, Reward=-20.720625, Minibatch Loss= 0.4919, Training Accuracy= 0.914, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2090, Reward=-14.893798, Minibatch Loss= 0.3652, Training Accuracy= 0.969, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1970, Reward=-12.686169, Minibatch Loss= 0.5268, Training Accuracy= 0.883, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2100, Reward=-17.041103, Minibatch Loss= 0.4782, Training Accuracy= 0.867, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1980, Reward=-22.803085, Minibatch Loss= 0.5754, Training Accuracy= 0.828, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2110, Reward=-11.637272, Minibatch Loss= 0.4504, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 1990, Reward=-27.631012, Minibatch Loss= 0.8160, Training Accuracy= 0.820, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2120, Reward=-11.793573, Minibatch Loss= 0.4187, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2000, Reward=-36.66, Minibatch Loss= 0.8968, Training Accuracy= 0.820, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2130, Reward=-17.104017, Minibatch Loss= 0.5328, Training Accuracy= 0.859, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2010, Reward=-24.384998, Minibatch Loss= 0.7955, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2140, Reward=-14.248412, Minibatch Loss= 0.3618, Training Accuracy= 0.914, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2020, Reward=-22.894474, Minibatch Loss= 0.8872, Training Accuracy= 0.766, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2150, Reward=-21.161407, Minibatch Loss= 0.5128, Training Accuracy= 0.852, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2030, Reward=-28.681759, Minibatch Loss= 0.7426, Training Accuracy= 0.867, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2160, Reward=-18.105064, Minibatch Loss= 0.5212, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2040, Reward=-28.235561, Minibatch Loss= 0.6358, Training Accuracy= 0.898, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2170, Reward=-1.9628837, Minibatch Loss= 0.5011, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2050, Reward=-17.979847, Minibatch Loss= 0.7936, Training Accuracy= 0.844, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2180, Reward=-13.7523, Minibatch Loss= 0.4018, Training Accuracy= 0.898, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2060, Reward=-23.665073, Minibatch Loss= 0.6968, Training Accuracy= 0.828, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2190, Reward=-22.062647, Minibatch Loss= 0.3740, Training Accuracy= 0.945, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2070, Reward=-30.582474, Minibatch Loss= 0.6824, Training Accuracy= 0.844, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2200, Reward=-6.049965, Minibatch Loss= 0.4162, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2080, Reward=-18.1656, Minibatch Loss= 0.6524, Training Accuracy= 0.930, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2210, Reward=-18.96018, Minibatch Loss= 0.7504, Training Accuracy= 0.766, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2090, Reward=-23.914371, Minibatch Loss= 0.5743, Training Accuracy= 0.906, Testing Acc= 0.429825, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2220, Reward=-20.628613, Minibatch Loss= 0.5448, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2100, Reward=-19.988228, Minibatch Loss= 0.5431, Training Accuracy= 0.906, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2230, Reward=-9.984524, Minibatch Loss= 0.5617, Training Accuracy= 0.805, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2110, Reward=-8.668995, Minibatch Loss= 0.5575, Training Accuracy= 0.898, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2240, Reward=-20.028889, Minibatch Loss= 0.4845, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2120, Reward=-19.139494, Minibatch Loss= 0.5805, Training Accuracy= 0.883, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2250, Reward=-12.771963, Minibatch Loss= 0.3655, Training Accuracy= 0.922, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2130, Reward=-18.54601, Minibatch Loss= 0.7657, Training Accuracy= 0.773, Testing Acc= 0.412281, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2260, Reward=-9.505259, Minibatch Loss= 0.4106, Training Accuracy= 0.945, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2140, Reward=-31.599243, Minibatch Loss= 0.5988, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2270, Reward=-20.1717, Minibatch Loss= 0.4202, Training Accuracy= 0.938, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2150, Reward=-25.889912, Minibatch Loss= 0.5744, Training Accuracy= 0.867, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2280, Reward=-11.267233, Minibatch Loss= 0.4424, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2160, Reward=-20.194563, Minibatch Loss= 0.5766, Training Accuracy= 0.875, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2290, Reward=-15.89828, Minibatch Loss= 0.7023, Training Accuracy= 0.828, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2170, Reward=-12.1060505, Minibatch Loss= 0.6934, Training Accuracy= 0.852, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2300, Reward=-20.96615, Minibatch Loss= 0.7503, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2180, Reward=-22.20306, Minibatch Loss= 0.6575, Training Accuracy= 0.898, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2310, Reward=-15.415725, Minibatch Loss= 0.7883, Training Accuracy= 0.852, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2190, Reward=-23.971924, Minibatch Loss= 0.8184, Training Accuracy= 0.766, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2320, Reward=-10.178673, Minibatch Loss= 0.6105, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2200, Reward=-26.439661, Minibatch Loss= 0.6964, Training Accuracy= 0.898, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2330, Reward=-11.709145, Minibatch Loss= 0.5139, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2340, Reward=-18.991539, Minibatch Loss= 0.4445, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2210, Reward=-22.671741, Minibatch Loss= 0.7083, Training Accuracy= 0.875, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2350, Reward=-12.53644, Minibatch Loss= 0.4565, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2220, Reward=-31.16763, Minibatch Loss= 0.6900, Training Accuracy= 0.859, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2360, Reward=-18.993715, Minibatch Loss= 0.4576, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2230, Reward=-30.115194, Minibatch Loss= 0.8503, Training Accuracy= 0.859, Testing Acc= 0.719298, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2370, Reward=-10.001816, Minibatch Loss= 0.4745, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2240, Reward=-33.06515, Minibatch Loss= 0.8503, Training Accuracy= 0.844, Testing Acc= 0.719298, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2380, Reward=-8.80246, Minibatch Loss= 0.4257, Training Accuracy= 0.930, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2250, Reward=-42.321175, Minibatch Loss= 0.8652, Training Accuracy= 0.820, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2390, Reward=-18.994646, Minibatch Loss= 0.4065, Training Accuracy= 0.930, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2260, Reward=-22.13261, Minibatch Loss= 0.7752, Training Accuracy= 0.875, Testing Acc= 0.675439, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2400, Reward=-12.115286, Minibatch Loss= 0.5118, Training Accuracy= 0.852, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2270, Reward=-27.805826, Minibatch Loss= 0.7702, Training Accuracy= 0.875, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2410, Reward=-13.045856, Minibatch Loss= 0.4337, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2280, Reward=-21.505032, Minibatch Loss= 0.8156, Training Accuracy= 0.750, Testing Acc= 0.666667, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2420, Reward=-8.212942, Minibatch Loss= 0.3796, Training Accuracy= 0.953, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2290, Reward=-21.109735, Minibatch Loss= 0.6668, Training Accuracy= 0.859, Testing Acc= 0.684211, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2430, Reward=-13.938522, Minibatch Loss= 0.4933, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2300, Reward=-22.85204, Minibatch Loss= 0.5371, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2440, Reward=-22.575523, Minibatch Loss= 0.4139, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2310, Reward=-18.51372, Minibatch Loss= 0.5756, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2450, Reward=-16.548683, Minibatch Loss= 0.4412, Training Accuracy= 0.930, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2320, Reward=-18.714674, Minibatch Loss= 0.5401, Training Accuracy= 0.945, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2460, Reward=-8.627089, Minibatch Loss= 0.4858, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2330, Reward=-17.673407, Minibatch Loss= 0.9103, Training Accuracy= 0.703, Testing Acc= 0.684211, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2470, Reward=-19.458591, Minibatch Loss= 0.5942, Training Accuracy= 0.852, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2340, Reward=-24.200064, Minibatch Loss= 0.9191, Training Accuracy= 0.852, Testing Acc= 0.701754, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2480, Reward=-24.714752, Minibatch Loss= 0.4863, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2350, Reward=-26.605642, Minibatch Loss= 0.8369, Training Accuracy= 0.859, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2490, Reward=-9.187927, Minibatch Loss= 0.4981, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2360, Reward=-33.847393, Minibatch Loss= 0.9576, Training Accuracy= 0.789, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2500, Reward=-16.006767, Minibatch Loss= 0.4679, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2370, Reward=-24.120193, Minibatch Loss= 1.0134, Training Accuracy= 0.758, Testing Acc= 0.666667, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2510, Reward=-10.993645, Minibatch Loss= 0.4028, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2380, Reward=-26.540428, Minibatch Loss= 0.7029, Training Accuracy= 0.859, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2520, Reward=-19.874186, Minibatch Loss= 0.3293, Training Accuracy= 0.938, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2390, Reward=-17.700039, Minibatch Loss= 0.8169, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2530, Reward=-5.3728724, Minibatch Loss= 0.5269, Training Accuracy= 0.852, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2400, Reward=-27.192602, Minibatch Loss= 0.7060, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2540, Reward=-17.904345, Minibatch Loss= 0.6450, Training Accuracy= 0.836, Testing Acc= 0.745614, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2550, Reward=-9.98865, Minibatch Loss= 0.5464, Training Accuracy= 0.914, Testing Acc= 0.763158, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2410, Reward=-16.299854, Minibatch Loss= 0.7002, Training Accuracy= 0.812, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2560, Reward=-14.202799, Minibatch Loss= 0.4169, Training Accuracy= 0.984, Testing Acc= 0.745614, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2420, Reward=-22.566692, Minibatch Loss= 0.6703, Training Accuracy= 0.836, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2570, Reward=-11.987591, Minibatch Loss= 0.4568, Training Accuracy= 0.867, Testing Acc= 0.719298, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2430, Reward=-24.797812, Minibatch Loss= 0.8456, Training Accuracy= 0.820, Testing Acc= 0.657895, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2580, Reward=-16.443468, Minibatch Loss= 0.5379, Training Accuracy= 0.891, Testing Acc= 0.745614, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2440, Reward=-23.232372, Minibatch Loss= 0.6744, Training Accuracy= 0.859, Testing Acc= 0.666667, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2590, Reward=-8.322384, Minibatch Loss= 0.5018, Training Accuracy= 0.922, Testing Acc= 0.701754, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2450, Reward=-22.85808, Minibatch Loss= 1.0387, Training Accuracy= 0.805, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2600, Reward=-9.547914, Minibatch Loss= 0.4621, Training Accuracy= 0.906, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2460, Reward=-33.88252, Minibatch Loss= 0.6619, Training Accuracy= 0.898, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2610, Reward=-18.749924, Minibatch Loss= 0.4544, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2470, Reward=-24.712557, Minibatch Loss= 0.8568, Training Accuracy= 0.820, Testing Acc= 0.675439, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2620, Reward=-10.882011, Minibatch Loss= 0.5231, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2480, Reward=-21.60588, Minibatch Loss= 0.6070, Training Accuracy= 0.914, Testing Acc= 0.666667, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2630, Reward=-17.625944, Minibatch Loss= 0.3976, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2490, Reward=-15.441726, Minibatch Loss= 0.6350, Training Accuracy= 0.828, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2640, Reward=-13.13158, Minibatch Loss= 0.4339, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2500, Reward=-13.1945505, Minibatch Loss= 0.5040, Training Accuracy= 0.914, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2650, Reward=-8.997316, Minibatch Loss= 0.4401, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2510, Reward=-19.00406, Minibatch Loss= 0.4348, Training Accuracy= 0.930, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2660, Reward=-5.034887, Minibatch Loss= 0.4651, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2520, Reward=-19.684174, Minibatch Loss= 0.5390, Training Accuracy= 0.867, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2670, Reward=-10.071205, Minibatch Loss= 0.3296, Training Accuracy= 0.945, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2530, Reward=-9.233909, Minibatch Loss= 0.5421, Training Accuracy= 0.867, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2680, Reward=-13.328164, Minibatch Loss= 0.3605, Training Accuracy= 0.914, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2540, Reward=-14.005331, Minibatch Loss= 0.4586, Training Accuracy= 0.953, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2690, Reward=-9.130407, Minibatch Loss= 0.5008, Training Accuracy= 0.875, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2550, Reward=-17.49718, Minibatch Loss= 0.6079, Training Accuracy= 0.836, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2700, Reward=-16.07209, Minibatch Loss= 0.7026, Training Accuracy= 0.836, Testing Acc= 0.736842, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2560, Reward=-16.04708, Minibatch Loss= 0.6126, Training Accuracy= 0.852, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2710, Reward=-17.081663, Minibatch Loss= 0.6268, Training Accuracy= 0.953, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2570, Reward=-17.270498, Minibatch Loss= 0.6040, Training Accuracy= 0.852, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2720, Reward=-23.567122, Minibatch Loss= 0.6154, Training Accuracy= 0.875, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2580, Reward=-19.588512, Minibatch Loss= 0.5932, Training Accuracy= 0.883, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2730, Reward=-20.061954, Minibatch Loss= 0.4519, Training Accuracy= 0.953, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2590, Reward=-12.179153, Minibatch Loss= 0.5871, Training Accuracy= 0.867, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2740, Reward=-12.563513, Minibatch Loss= 0.6146, Training Accuracy= 0.922, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2600, Reward=-21.103642, Minibatch Loss= 0.6854, Training Accuracy= 0.859, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2750, Reward=-20.753124, Minibatch Loss= 0.5547, Training Accuracy= 0.938, Testing Acc= 0.763158, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2610, Reward=-13.3001375, Minibatch Loss= 0.6855, Training Accuracy= 0.812, Testing Acc= 0.429825, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2760, Reward=-13.9959, Minibatch Loss= 0.5145, Training Accuracy= 0.891, Testing Acc= 0.763158, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2620, Reward=-15.906506, Minibatch Loss= 0.7406, Training Accuracy= 0.844, Testing Acc= 0.412281, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2770, Reward=-13.494366, Minibatch Loss= 0.5839, Training Accuracy= 0.914, Testing Acc= 0.745614, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2630, Reward=-23.849262, Minibatch Loss= 0.6836, Training Accuracy= 0.891, Testing Acc= 0.412281, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2780, Reward=-16.044285, Minibatch Loss= 0.4182, Training Accuracy= 0.938, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2640, Reward=-24.847801, Minibatch Loss= 0.5814, Training Accuracy= 0.898, Testing Acc= 0.429825, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2790, Reward=-20.226763, Minibatch Loss= 0.3954, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2650, Reward=-17.326468, Minibatch Loss= 0.7263, Training Accuracy= 0.852, Testing Acc= 0.473684, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2800, Reward=-4.166525, Minibatch Loss= 0.4438, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2660, Reward=-20.39883, Minibatch Loss= 0.5901, Training Accuracy= 0.898, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2810, Reward=-12.620182, Minibatch Loss= 0.3846, Training Accuracy= 0.938, Testing Acc= 0.675439, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2670, Reward=-12.358156, Minibatch Loss= 0.4435, Training Accuracy= 0.938, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2820, Reward=-8.2980585, Minibatch Loss= 0.3654, Training Accuracy= 0.969, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2680, Reward=-12.315037, Minibatch Loss= 0.5803, Training Accuracy= 0.820, Testing Acc= 0.447368, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2830, Reward=-12.2516365, Minibatch Loss= 0.3038, Training Accuracy= 0.961, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2690, Reward=-24.16917, Minibatch Loss= 0.3793, Training Accuracy= 0.930, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2840, Reward=-13.321647, Minibatch Loss= 0.4060, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2700, Reward=-6.1810694, Minibatch Loss= 0.5952, Training Accuracy= 0.812, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2850, Reward=-17.090225, Minibatch Loss= 0.5889, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2710, Reward=-23.584385, Minibatch Loss= 0.4232, Training Accuracy= 0.930, Testing Acc= 0.464912, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2860, Reward=-13.315029, Minibatch Loss= 0.4942, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2870, Reward=-14.394634, Minibatch Loss= 0.3592, Training Accuracy= 0.945, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2720, Reward=-9.798099, Minibatch Loss= 0.5175, Training Accuracy= 0.898, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2880, Reward=-22.347565, Minibatch Loss= 0.5055, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2730, Reward=-18.136154, Minibatch Loss= 0.4390, Training Accuracy= 0.945, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2890, Reward=-13.2148285, Minibatch Loss= 0.4682, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2740, Reward=-9.019374, Minibatch Loss= 0.4603, Training Accuracy= 0.883, Testing Acc= 0.464912, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2900, Reward=-19.551361, Minibatch Loss= 0.6991, Training Accuracy= 0.812, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2750, Reward=-14.796946, Minibatch Loss= 0.4361, Training Accuracy= 0.922, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2910, Reward=-19.810223, Minibatch Loss= 0.5050, Training Accuracy= 0.938, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2760, Reward=-10.940182, Minibatch Loss= 0.4400, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2920, Reward=-14.3597975, Minibatch Loss= 0.5230, Training Accuracy= 0.930, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2770, Reward=-12.946702, Minibatch Loss= 0.3932, Training Accuracy= 0.930, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2930, Reward=-4.7031665, Minibatch Loss= 0.5125, Training Accuracy= 0.930, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2780, Reward=-14.844616, Minibatch Loss= 0.3319, Training Accuracy= 0.945, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2940, Reward=-13.719326, Minibatch Loss= 0.4614, Training Accuracy= 0.883, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2790, Reward=-15.693134, Minibatch Loss= 0.3755, Training Accuracy= 0.922, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2950, Reward=-16.5735, Minibatch Loss= 0.4539, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2800, Reward=-16.028877, Minibatch Loss= 0.3996, Training Accuracy= 0.930, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2960, Reward=-21.055237, Minibatch Loss= 0.4646, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2810, Reward=-16.598532, Minibatch Loss= 0.3777, Training Accuracy= 0.938, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2970, Reward=-18.489216, Minibatch Loss= 0.6984, Training Accuracy= 0.820, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2820, Reward=-6.9674067, Minibatch Loss= 0.4470, Training Accuracy= 0.867, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2980, Reward=-13.383497, Minibatch Loss= 0.4379, Training Accuracy= 0.930, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2830, Reward=-16.603159, Minibatch Loss= 0.5745, Training Accuracy= 0.883, Testing Acc= 0.464912, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2990, Reward=-14.066687, Minibatch Loss= 0.3748, Training Accuracy= 0.930, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.881348, Max AP=  0.898198\n",
      "Step 2840, Reward=-16.267998, Minibatch Loss= 0.4026, Training Accuracy= 0.930, Testing Acc= 0.464912, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.80701756\n",
      "Step 2850, Reward=-15.168155, Minibatch Loss= 0.3893, Training Accuracy= 0.922, Testing Acc= 0.456140, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2860, Reward=-11.529247, Minibatch Loss= 0.3660, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 10, Reward=-63.599827, Minibatch Loss= 1.0446, Training Accuracy= 0.664, Testing Acc= 0.640351, Max Final Accuracy=  0.640351, Max AUC=  0.760080, Max AP=  0.822435\n",
      "Step 2870, Reward=-12.509094, Minibatch Loss= 0.4211, Training Accuracy= 0.859, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 2880, Reward=-16.23541, Minibatch Loss= 0.7258, Training Accuracy= 0.789, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 20, Reward=-43.27861, Minibatch Loss= 0.7607, Training Accuracy= 0.578, Testing Acc= 0.771930, Max Final Accuracy=  0.771930, Max AUC=  0.753463, Max AP=  0.805863\n",
      "Step 2890, Reward=-25.333977, Minibatch Loss= 0.5838, Training Accuracy= 0.938, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 30, Reward=-60.47671, Minibatch Loss= 0.8256, Training Accuracy= 0.656, Testing Acc= 0.780702, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2900, Reward=-17.348831, Minibatch Loss= 0.5958, Training Accuracy= 0.875, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 40, Reward=-45.566444, Minibatch Loss= 0.6323, Training Accuracy= 0.719, Testing Acc= 0.526316, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2910, Reward=-23.843657, Minibatch Loss= 0.5387, Training Accuracy= 0.828, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 50, Reward=-39.057327, Minibatch Loss= 0.5468, Training Accuracy= 0.750, Testing Acc= 0.500000, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2920, Reward=-23.951427, Minibatch Loss= 0.4298, Training Accuracy= 0.977, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 60, Reward=-35.833782, Minibatch Loss= 0.4775, Training Accuracy= 0.836, Testing Acc= 0.535088, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2930, Reward=-14.421972, Minibatch Loss= 0.4839, Training Accuracy= 0.883, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 70, Reward=-29.014174, Minibatch Loss= 0.6083, Training Accuracy= 0.852, Testing Acc= 0.491228, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2940, Reward=-13.266406, Minibatch Loss= 0.4370, Training Accuracy= 0.938, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 80, Reward=-29.529982, Minibatch Loss= 0.6537, Training Accuracy= 0.797, Testing Acc= 0.675439, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2950, Reward=-6.686095, Minibatch Loss= 0.4270, Training Accuracy= 0.906, Testing Acc= 0.517544, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 90, Reward=-25.090778, Minibatch Loss= 0.5763, Training Accuracy= 0.812, Testing Acc= 0.710526, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2960, Reward=-11.675556, Minibatch Loss= 0.3773, Training Accuracy= 0.945, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 100, Reward=-32.812298, Minibatch Loss= 0.6240, Training Accuracy= 0.812, Testing Acc= 0.508772, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2970, Reward=-11.492248, Minibatch Loss= 0.4186, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 110, Reward=-24.266327, Minibatch Loss= 0.6525, Training Accuracy= 0.820, Testing Acc= 0.719298, Max Final Accuracy=  0.780702, Max AUC=  0.822099, Max AP=  0.857494\n",
      "Step 2980, Reward=-12.906388, Minibatch Loss= 0.4197, Training Accuracy= 0.906, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 120, Reward=-20.51339, Minibatch Loss= 0.6026, Training Accuracy= 0.820, Testing Acc= 0.789474, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 2990, Reward=-12.58937, Minibatch Loss= 0.5854, Training Accuracy= 0.867, Testing Acc= 0.491228, Max Final Accuracy=  0.789474, Max AUC=  0.843798, Max AP=  0.848728\n",
      "Step 130, Reward=-32.96334, Minibatch Loss= 0.6260, Training Accuracy= 0.820, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.7894737\n",
      "Step 140, Reward=-23.075705, Minibatch Loss= 0.6646, Training Accuracy= 0.828, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 150, Reward=-24.344566, Minibatch Loss= 0.5971, Training Accuracy= 0.844, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 10, Reward=-74.21386, Minibatch Loss= 1.1092, Training Accuracy= 0.609, Testing Acc= 0.649123, Max Final Accuracy=  0.649123, Max AUC=  0.763927, Max AP=  0.825991\n",
      "Step 160, Reward=-22.08117, Minibatch Loss= 0.5750, Training Accuracy= 0.906, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 20, Reward=-65.80384, Minibatch Loss= 0.7873, Training Accuracy= 0.539, Testing Acc= 0.587719, Max Final Accuracy=  0.649123, Max AUC=  0.763927, Max AP=  0.825991\n",
      "Step 170, Reward=-28.98111, Minibatch Loss= 0.6642, Training Accuracy= 0.805, Testing Acc= 0.631579, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 180, Reward=-17.1963, Minibatch Loss= 0.5823, Training Accuracy= 0.867, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 30, Reward=-51.182163, Minibatch Loss= 0.6605, Training Accuracy= 0.766, Testing Acc= 0.780702, Max Final Accuracy=  0.780702, Max AUC=  0.788550, Max AP=  0.831383\n",
      "Step 190, Reward=-33.050854, Minibatch Loss= 0.5976, Training Accuracy= 0.859, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 40, Reward=-34.21287, Minibatch Loss= 0.6562, Training Accuracy= 0.719, Testing Acc= 0.675439, Max Final Accuracy=  0.780702, Max AUC=  0.788550, Max AP=  0.831383\n",
      "Step 200, Reward=-22.560764, Minibatch Loss= 0.5403, Training Accuracy= 0.867, Testing Acc= 0.745614, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 50, Reward=-35.302696, Minibatch Loss= 0.5738, Training Accuracy= 0.812, Testing Acc= 0.491228, Max Final Accuracy=  0.780702, Max AUC=  0.788550, Max AP=  0.831383\n",
      "Step 210, Reward=-22.239582, Minibatch Loss= 0.5654, Training Accuracy= 0.828, Testing Acc= 0.789474, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 60, Reward=-34.012596, Minibatch Loss= 0.5986, Training Accuracy= 0.758, Testing Acc= 0.710526, Max Final Accuracy=  0.780702, Max AUC=  0.788550, Max AP=  0.831383\n",
      "Step 220, Reward=-24.745096, Minibatch Loss= 0.5919, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 70, Reward=-29.437603, Minibatch Loss= 0.6391, Training Accuracy= 0.836, Testing Acc= 0.622807, Max Final Accuracy=  0.780702, Max AUC=  0.788550, Max AP=  0.831383\n",
      "Step 230, Reward=-24.237038, Minibatch Loss= 0.6755, Training Accuracy= 0.781, Testing Acc= 0.771930, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 80, Reward=-21.268496, Minibatch Loss= 0.7179, Training Accuracy= 0.781, Testing Acc= 0.570175, Max Final Accuracy=  0.780702, Max AUC=  0.788550, Max AP=  0.831383\n",
      "Step 240, Reward=-31.921646, Minibatch Loss= 0.6666, Training Accuracy= 0.828, Testing Acc= 0.754386, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 90, Reward=-32.277336, Minibatch Loss= 0.7828, Training Accuracy= 0.734, Testing Acc= 0.815789, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 250, Reward=-26.105465, Minibatch Loss= 0.7365, Training Accuracy= 0.789, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 100, Reward=-24.758585, Minibatch Loss= 0.6837, Training Accuracy= 0.750, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 260, Reward=-30.905525, Minibatch Loss= 0.6487, Training Accuracy= 0.844, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 110, Reward=-31.598972, Minibatch Loss= 0.6726, Training Accuracy= 0.805, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 270, Reward=-20.752052, Minibatch Loss= 0.7158, Training Accuracy= 0.828, Testing Acc= 0.666667, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 120, Reward=-23.247559, Minibatch Loss= 0.6866, Training Accuracy= 0.797, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 280, Reward=-22.469582, Minibatch Loss= 0.5842, Training Accuracy= 0.883, Testing Acc= 0.789474, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 130, Reward=-18.861397, Minibatch Loss= 0.6718, Training Accuracy= 0.820, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 290, Reward=-27.302034, Minibatch Loss= 0.5211, Training Accuracy= 0.922, Testing Acc= 0.675439, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 300, Reward=-20.457806, Minibatch Loss= 0.7087, Training Accuracy= 0.875, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 140, Reward=-28.095907, Minibatch Loss= 0.6202, Training Accuracy= 0.820, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 310, Reward=-26.885513, Minibatch Loss= 0.5898, Training Accuracy= 0.891, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 150, Reward=-24.483387, Minibatch Loss= 0.6221, Training Accuracy= 0.781, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 320, Reward=-26.535465, Minibatch Loss= 0.6129, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 160, Reward=-24.318676, Minibatch Loss= 0.5365, Training Accuracy= 0.898, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 330, Reward=-21.533962, Minibatch Loss= 0.5367, Training Accuracy= 0.906, Testing Acc= 0.561404, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 170, Reward=-21.600018, Minibatch Loss= 0.8426, Training Accuracy= 0.828, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 340, Reward=-24.495783, Minibatch Loss= 0.5265, Training Accuracy= 0.906, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 180, Reward=-36.820263, Minibatch Loss= 0.7207, Training Accuracy= 0.820, Testing Acc= 0.456140, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 350, Reward=-14.616418, Minibatch Loss= 0.9500, Training Accuracy= 0.734, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 190, Reward=-31.530403, Minibatch Loss= 0.6417, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 360, Reward=-21.867956, Minibatch Loss= 0.7926, Training Accuracy= 0.828, Testing Acc= 0.561404, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 200, Reward=-24.918892, Minibatch Loss= 0.6150, Training Accuracy= 0.781, Testing Acc= 0.438596, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 370, Reward=-26.205685, Minibatch Loss= 0.6844, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 210, Reward=-19.408792, Minibatch Loss= 0.7665, Training Accuracy= 0.883, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 380, Reward=-38.63318, Minibatch Loss= 0.6390, Training Accuracy= 0.844, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 220, Reward=-17.22721, Minibatch Loss= 0.5619, Training Accuracy= 0.875, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 390, Reward=-27.658014, Minibatch Loss= 0.5736, Training Accuracy= 0.891, Testing Acc= 0.631579, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 230, Reward=-18.034258, Minibatch Loss= 0.6977, Training Accuracy= 0.758, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 400, Reward=-23.983597, Minibatch Loss= 0.5328, Training Accuracy= 0.914, Testing Acc= 0.701754, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 240, Reward=-14.762758, Minibatch Loss= 0.6975, Training Accuracy= 0.797, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 410, Reward=-23.202225, Minibatch Loss= 0.5426, Training Accuracy= 0.852, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 250, Reward=-21.4257, Minibatch Loss= 0.5706, Training Accuracy= 0.859, Testing Acc= 0.456140, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 420, Reward=-15.619585, Minibatch Loss= 0.5947, Training Accuracy= 0.859, Testing Acc= 0.745614, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 260, Reward=-19.147633, Minibatch Loss= 0.6009, Training Accuracy= 0.867, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 430, Reward=-23.70062, Minibatch Loss= 0.7187, Training Accuracy= 0.812, Testing Acc= 0.684211, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 270, Reward=-27.852524, Minibatch Loss= 0.7543, Training Accuracy= 0.758, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 440, Reward=-29.002455, Minibatch Loss= 0.6984, Training Accuracy= 0.844, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 280, Reward=-28.277615, Minibatch Loss= 0.7877, Training Accuracy= 0.797, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 450, Reward=-19.89564, Minibatch Loss= 0.6625, Training Accuracy= 0.820, Testing Acc= 0.570175, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 290, Reward=-31.921246, Minibatch Loss= 0.6726, Training Accuracy= 0.852, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 460, Reward=-28.552214, Minibatch Loss= 0.5954, Training Accuracy= 0.891, Testing Acc= 0.570175, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 300, Reward=-22.393332, Minibatch Loss= 0.6704, Training Accuracy= 0.844, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 470, Reward=-14.200292, Minibatch Loss= 0.6650, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 310, Reward=-29.282593, Minibatch Loss= 0.5557, Training Accuracy= 0.898, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 480, Reward=-16.775936, Minibatch Loss= 0.6942, Training Accuracy= 0.789, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 320, Reward=-21.821878, Minibatch Loss= 0.6471, Training Accuracy= 0.828, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 490, Reward=-17.891155, Minibatch Loss= 0.5636, Training Accuracy= 0.859, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 330, Reward=-21.167511, Minibatch Loss= 0.6520, Training Accuracy= 0.797, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 500, Reward=-26.86153, Minibatch Loss= 0.6703, Training Accuracy= 0.844, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 340, Reward=-23.646915, Minibatch Loss= 0.6829, Training Accuracy= 0.852, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 510, Reward=-21.624222, Minibatch Loss= 0.5119, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 350, Reward=-17.228773, Minibatch Loss= 0.5579, Training Accuracy= 0.883, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 520, Reward=-22.159662, Minibatch Loss= 0.5113, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 360, Reward=-21.727722, Minibatch Loss= 0.5915, Training Accuracy= 0.828, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 530, Reward=-20.667181, Minibatch Loss= 0.5050, Training Accuracy= 0.914, Testing Acc= 0.771930, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 540, Reward=-21.00446, Minibatch Loss= 0.5883, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 370, Reward=-23.152363, Minibatch Loss= 0.5108, Training Accuracy= 0.898, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 550, Reward=-17.825523, Minibatch Loss= 0.5705, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 380, Reward=-17.630556, Minibatch Loss= 0.5029, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 560, Reward=-20.871927, Minibatch Loss= 0.5339, Training Accuracy= 0.891, Testing Acc= 0.570175, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 390, Reward=-23.421236, Minibatch Loss= 0.5799, Training Accuracy= 0.820, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 570, Reward=-24.808151, Minibatch Loss= 0.5028, Training Accuracy= 0.906, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 400, Reward=-26.730988, Minibatch Loss= 0.5068, Training Accuracy= 0.906, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 580, Reward=-21.855013, Minibatch Loss= 0.4603, Training Accuracy= 0.930, Testing Acc= 0.561404, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 410, Reward=-21.162052, Minibatch Loss= 0.6133, Training Accuracy= 0.844, Testing Acc= 0.771930, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 590, Reward=-15.253219, Minibatch Loss= 0.6931, Training Accuracy= 0.859, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 420, Reward=-21.107483, Minibatch Loss= 0.5217, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 600, Reward=-24.896473, Minibatch Loss= 0.6789, Training Accuracy= 0.820, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 430, Reward=-23.527956, Minibatch Loss= 0.6234, Training Accuracy= 0.867, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 610, Reward=-20.756977, Minibatch Loss= 0.5687, Training Accuracy= 0.891, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 440, Reward=-27.516169, Minibatch Loss= 0.6005, Training Accuracy= 0.898, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 620, Reward=-13.721834, Minibatch Loss= 0.4881, Training Accuracy= 0.922, Testing Acc= 0.754386, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 450, Reward=-24.791918, Minibatch Loss= 0.6121, Training Accuracy= 0.867, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 630, Reward=-20.134872, Minibatch Loss= 0.7560, Training Accuracy= 0.844, Testing Acc= 0.736842, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 460, Reward=-22.501625, Minibatch Loss= 0.6561, Training Accuracy= 0.883, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 640, Reward=-26.030382, Minibatch Loss= 0.5271, Training Accuracy= 0.891, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 470, Reward=-28.825188, Minibatch Loss= 0.6733, Training Accuracy= 0.836, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 650, Reward=-23.05046, Minibatch Loss= 0.6714, Training Accuracy= 0.773, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 480, Reward=-26.20441, Minibatch Loss= 0.6476, Training Accuracy= 0.875, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 660, Reward=-25.639862, Minibatch Loss= 0.6155, Training Accuracy= 0.883, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 490, Reward=-19.33166, Minibatch Loss= 0.5917, Training Accuracy= 0.875, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 670, Reward=-21.030664, Minibatch Loss= 0.6015, Training Accuracy= 0.891, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 500, Reward=-17.474907, Minibatch Loss= 0.6155, Training Accuracy= 0.875, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 680, Reward=-17.490175, Minibatch Loss= 0.6030, Training Accuracy= 0.820, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 510, Reward=-19.578342, Minibatch Loss= 0.5062, Training Accuracy= 0.938, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 690, Reward=-21.623623, Minibatch Loss= 0.7007, Training Accuracy= 0.828, Testing Acc= 0.745614, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 520, Reward=-27.739977, Minibatch Loss= 0.6796, Training Accuracy= 0.859, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 700, Reward=-19.324509, Minibatch Loss= 0.6876, Training Accuracy= 0.836, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 530, Reward=-22.207018, Minibatch Loss= 0.6500, Training Accuracy= 0.859, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 710, Reward=-29.537155, Minibatch Loss= 0.6058, Training Accuracy= 0.852, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 540, Reward=-26.10598, Minibatch Loss= 0.6169, Training Accuracy= 0.875, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 720, Reward=-12.757255, Minibatch Loss= 0.5600, Training Accuracy= 0.883, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 550, Reward=-17.627117, Minibatch Loss= 0.5671, Training Accuracy= 0.898, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 730, Reward=-17.115398, Minibatch Loss= 0.5222, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 560, Reward=-22.510406, Minibatch Loss= 0.6077, Training Accuracy= 0.875, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 740, Reward=-20.841343, Minibatch Loss= 0.4780, Training Accuracy= 0.883, Testing Acc= 0.657895, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 750, Reward=-14.801124, Minibatch Loss= 0.4600, Training Accuracy= 0.906, Testing Acc= 0.745614, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 570, Reward=-18.136963, Minibatch Loss= 0.6894, Training Accuracy= 0.812, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 760, Reward=-24.810108, Minibatch Loss= 0.4853, Training Accuracy= 0.906, Testing Acc= 0.780702, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 580, Reward=-21.589045, Minibatch Loss= 0.5607, Training Accuracy= 0.867, Testing Acc= 0.771930, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 770, Reward=-20.340004, Minibatch Loss= 0.5490, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 590, Reward=-26.33781, Minibatch Loss= 0.7194, Training Accuracy= 0.836, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 780, Reward=-20.069408, Minibatch Loss= 0.5709, Training Accuracy= 0.883, Testing Acc= 0.780702, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 600, Reward=-27.73864, Minibatch Loss= 0.6821, Training Accuracy= 0.891, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 790, Reward=-29.428452, Minibatch Loss= 0.6305, Training Accuracy= 0.875, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 610, Reward=-28.689484, Minibatch Loss= 0.6676, Training Accuracy= 0.859, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 800, Reward=-27.844574, Minibatch Loss= 0.6786, Training Accuracy= 0.914, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 620, Reward=-22.76113, Minibatch Loss= 0.6026, Training Accuracy= 0.883, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 810, Reward=-24.5263, Minibatch Loss= 0.7715, Training Accuracy= 0.836, Testing Acc= 0.649123, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 630, Reward=-17.885191, Minibatch Loss= 0.5823, Training Accuracy= 0.898, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 820, Reward=-24.945461, Minibatch Loss= 0.6985, Training Accuracy= 0.867, Testing Acc= 0.482456, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 640, Reward=-18.120853, Minibatch Loss= 0.6750, Training Accuracy= 0.859, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 830, Reward=-28.40935, Minibatch Loss= 0.7501, Training Accuracy= 0.789, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 650, Reward=-15.030783, Minibatch Loss= 0.6067, Training Accuracy= 0.844, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 840, Reward=-29.992228, Minibatch Loss= 0.6504, Training Accuracy= 0.906, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 660, Reward=-21.742666, Minibatch Loss= 0.5057, Training Accuracy= 0.914, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 850, Reward=-19.551723, Minibatch Loss= 0.6920, Training Accuracy= 0.836, Testing Acc= 0.614035, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 670, Reward=-17.559069, Minibatch Loss= 0.6136, Training Accuracy= 0.852, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 860, Reward=-16.149742, Minibatch Loss= 0.5883, Training Accuracy= 0.898, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 680, Reward=-19.729155, Minibatch Loss= 0.5550, Training Accuracy= 0.867, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 870, Reward=-24.607018, Minibatch Loss= 0.5182, Training Accuracy= 0.922, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 690, Reward=-29.790554, Minibatch Loss= 0.6613, Training Accuracy= 0.766, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 880, Reward=-21.082138, Minibatch Loss= 0.6956, Training Accuracy= 0.820, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 700, Reward=-17.77507, Minibatch Loss= 0.5732, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 890, Reward=-19.777695, Minibatch Loss= 0.6046, Training Accuracy= 0.945, Testing Acc= 0.578947, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 710, Reward=-19.449776, Minibatch Loss= 0.5787, Training Accuracy= 0.867, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 900, Reward=-19.311428, Minibatch Loss= 0.6140, Training Accuracy= 0.891, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 720, Reward=-22.062998, Minibatch Loss= 0.9610, Training Accuracy= 0.750, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 910, Reward=-35.830353, Minibatch Loss= 0.6586, Training Accuracy= 0.852, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 730, Reward=-27.953478, Minibatch Loss= 0.7256, Training Accuracy= 0.828, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 920, Reward=-19.791718, Minibatch Loss= 0.5479, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 740, Reward=-21.712206, Minibatch Loss= 0.6852, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 930, Reward=-23.841526, Minibatch Loss= 0.5111, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 750, Reward=-24.17371, Minibatch Loss= 0.5991, Training Accuracy= 0.859, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 940, Reward=-20.161896, Minibatch Loss= 0.6510, Training Accuracy= 0.805, Testing Acc= 0.605263, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 760, Reward=-20.64076, Minibatch Loss= 0.5704, Training Accuracy= 0.883, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 950, Reward=-26.100481, Minibatch Loss= 0.6720, Training Accuracy= 0.922, Testing Acc= 0.763158, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 770, Reward=-19.896204, Minibatch Loss= 0.5498, Training Accuracy= 0.914, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 960, Reward=-20.420168, Minibatch Loss= 0.7210, Training Accuracy= 0.898, Testing Acc= 0.745614, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 780, Reward=-19.447807, Minibatch Loss= 0.5300, Training Accuracy= 0.867, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 970, Reward=-23.496214, Minibatch Loss= 0.7418, Training Accuracy= 0.883, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 790, Reward=-20.991869, Minibatch Loss= 0.6626, Training Accuracy= 0.805, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 980, Reward=-24.88115, Minibatch Loss= 0.6314, Training Accuracy= 0.898, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 800, Reward=-22.655994, Minibatch Loss= 0.5769, Training Accuracy= 0.836, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 990, Reward=-26.557453, Minibatch Loss= 0.6555, Training Accuracy= 0.859, Testing Acc= 0.535088, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 810, Reward=-20.823885, Minibatch Loss= 0.4803, Training Accuracy= 0.906, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1000, Reward=-24.230703, Minibatch Loss= 0.7373, Training Accuracy= 0.773, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 1010, Reward=-29.384447, Minibatch Loss= 0.7793, Training Accuracy= 0.734, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 820, Reward=-17.035757, Minibatch Loss= 0.6444, Training Accuracy= 0.820, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1020, Reward=-19.965092, Minibatch Loss= 0.7639, Training Accuracy= 0.859, Testing Acc= 0.622807, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 830, Reward=-19.821156, Minibatch Loss= 0.5199, Training Accuracy= 0.898, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1030, Reward=-18.132256, Minibatch Loss= 0.8400, Training Accuracy= 0.781, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 840, Reward=-26.92631, Minibatch Loss= 0.6152, Training Accuracy= 0.852, Testing Acc= 0.815789, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1040, Reward=-28.39292, Minibatch Loss= 0.6275, Training Accuracy= 0.891, Testing Acc= 0.570175, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 850, Reward=-18.576544, Minibatch Loss= 0.4926, Training Accuracy= 0.906, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1050, Reward=-24.58905, Minibatch Loss= 0.7250, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 860, Reward=-18.243113, Minibatch Loss= 0.4837, Training Accuracy= 0.945, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1060, Reward=-21.981781, Minibatch Loss= 0.6777, Training Accuracy= 0.891, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 870, Reward=-15.675161, Minibatch Loss= 0.5445, Training Accuracy= 0.898, Testing Acc= 0.771930, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1070, Reward=-10.103131, Minibatch Loss= 0.5738, Training Accuracy= 0.914, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 880, Reward=-18.346703, Minibatch Loss= 0.5444, Training Accuracy= 0.883, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1080, Reward=-20.646744, Minibatch Loss= 0.5306, Training Accuracy= 0.914, Testing Acc= 0.640351, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 890, Reward=-15.527603, Minibatch Loss= 0.4557, Training Accuracy= 0.930, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1090, Reward=-19.606075, Minibatch Loss= 0.5380, Training Accuracy= 0.914, Testing Acc= 0.596491, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 900, Reward=-19.59003, Minibatch Loss= 0.5767, Training Accuracy= 0.867, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1100, Reward=-20.634161, Minibatch Loss= 0.5207, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 910, Reward=-23.785809, Minibatch Loss= 0.6796, Training Accuracy= 0.836, Testing Acc= 0.789474, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1110, Reward=-18.298561, Minibatch Loss= 0.5235, Training Accuracy= 0.898, Testing Acc= 0.719298, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 920, Reward=-27.789564, Minibatch Loss= 0.5557, Training Accuracy= 0.859, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1120, Reward=-19.889608, Minibatch Loss= 0.7125, Training Accuracy= 0.891, Testing Acc= 0.736842, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 930, Reward=-20.213165, Minibatch Loss= 0.4217, Training Accuracy= 0.961, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1130, Reward=-18.545433, Minibatch Loss= 0.6551, Training Accuracy= 0.828, Testing Acc= 0.692982, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 940, Reward=-12.222817, Minibatch Loss= 0.4675, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1140, Reward=-19.69451, Minibatch Loss= 0.5012, Training Accuracy= 0.938, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 950, Reward=-12.17686, Minibatch Loss= 0.5135, Training Accuracy= 0.859, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1150, Reward=-22.0618, Minibatch Loss= 0.7896, Training Accuracy= 0.766, Testing Acc= 0.710526, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 960, Reward=-20.733814, Minibatch Loss= 0.5249, Training Accuracy= 0.875, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1160, Reward=-22.576456, Minibatch Loss= 0.5752, Training Accuracy= 0.836, Testing Acc= 0.657895, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 970, Reward=-16.992798, Minibatch Loss= 0.5401, Training Accuracy= 0.867, Testing Acc= 0.728070, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1170, Reward=-23.690186, Minibatch Loss= 0.8169, Training Accuracy= 0.789, Testing Acc= 0.789474, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 980, Reward=-17.764717, Minibatch Loss= 0.5066, Training Accuracy= 0.898, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1180, Reward=-28.396181, Minibatch Loss= 0.8903, Training Accuracy= 0.828, Testing Acc= 0.508772, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 990, Reward=-17.0425, Minibatch Loss= 0.4817, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1190, Reward=-28.100056, Minibatch Loss= 0.8844, Training Accuracy= 0.859, Testing Acc= 0.552632, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 1000, Reward=-15.330637, Minibatch Loss= 0.4492, Training Accuracy= 0.891, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1200, Reward=-21.651506, Minibatch Loss= 0.7857, Training Accuracy= 0.883, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 1010, Reward=-20.555208, Minibatch Loss= 0.5163, Training Accuracy= 0.859, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1210, Reward=-20.977692, Minibatch Loss= 0.6893, Training Accuracy= 0.805, Testing Acc= 0.543860, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 1020, Reward=-17.9656, Minibatch Loss= 0.6143, Training Accuracy= 0.852, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1220, Reward=-21.05759, Minibatch Loss= 0.6060, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.789474, Max AUC=  0.871191, Max AP=  0.901812\n",
      "Step 1030, Reward=-11.580846, Minibatch Loss= 0.6935, Training Accuracy= 0.766, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1230, Reward=-15.753513, Minibatch Loss= 0.5464, Training Accuracy= 0.914, Testing Acc= 0.807018, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1040, Reward=-22.283134, Minibatch Loss= 0.6294, Training Accuracy= 0.914, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1240, Reward=-24.165806, Minibatch Loss= 0.6773, Training Accuracy= 0.867, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1050, Reward=-18.537521, Minibatch Loss= 0.5256, Training Accuracy= 0.906, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1250, Reward=-25.488993, Minibatch Loss= 0.6430, Training Accuracy= 0.883, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1060, Reward=-7.219304, Minibatch Loss= 0.5529, Training Accuracy= 0.859, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1260, Reward=-29.501856, Minibatch Loss= 0.6585, Training Accuracy= 0.898, Testing Acc= 0.491228, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1070, Reward=-15.465951, Minibatch Loss= 0.4587, Training Accuracy= 0.867, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1270, Reward=-22.363522, Minibatch Loss= 0.6367, Training Accuracy= 0.898, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1080, Reward=-12.8983345, Minibatch Loss= 0.5030, Training Accuracy= 0.891, Testing Acc= 0.482456, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1280, Reward=-21.31454, Minibatch Loss= 0.8611, Training Accuracy= 0.742, Testing Acc= 0.745614, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1090, Reward=-7.001469, Minibatch Loss= 0.3944, Training Accuracy= 0.914, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1290, Reward=-25.803713, Minibatch Loss= 0.6395, Training Accuracy= 0.844, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1100, Reward=-18.030481, Minibatch Loss= 0.5134, Training Accuracy= 0.844, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1300, Reward=-16.219513, Minibatch Loss= 0.6050, Training Accuracy= 0.883, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1110, Reward=-27.50566, Minibatch Loss= 0.4718, Training Accuracy= 0.930, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1310, Reward=-18.293001, Minibatch Loss= 0.5734, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1120, Reward=-21.584105, Minibatch Loss= 0.5336, Training Accuracy= 0.875, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1320, Reward=-22.276745, Minibatch Loss= 0.5471, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1130, Reward=-26.364193, Minibatch Loss= 0.5446, Training Accuracy= 0.906, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1330, Reward=-24.15866, Minibatch Loss= 0.5093, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1140, Reward=-21.35625, Minibatch Loss= 0.5348, Training Accuracy= 0.914, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1340, Reward=-15.379468, Minibatch Loss= 0.5621, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1150, Reward=-16.246948, Minibatch Loss= 0.8335, Training Accuracy= 0.781, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1350, Reward=-20.001205, Minibatch Loss= 0.5520, Training Accuracy= 0.852, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1160, Reward=-27.625519, Minibatch Loss= 0.6883, Training Accuracy= 0.750, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1360, Reward=-22.462496, Minibatch Loss= 0.5464, Training Accuracy= 0.922, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1170, Reward=-19.009415, Minibatch Loss= 0.5340, Training Accuracy= 0.898, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1370, Reward=-17.98064, Minibatch Loss= 0.4727, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1180, Reward=-24.4855, Minibatch Loss= 0.7054, Training Accuracy= 0.789, Testing Acc= 0.482456, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1380, Reward=-18.474232, Minibatch Loss= 0.6712, Training Accuracy= 0.797, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1190, Reward=-16.376648, Minibatch Loss= 0.5016, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1390, Reward=-20.446136, Minibatch Loss= 0.7080, Training Accuracy= 0.789, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1200, Reward=-11.069412, Minibatch Loss= 0.5280, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1400, Reward=-25.543766, Minibatch Loss= 0.6113, Training Accuracy= 0.883, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1210, Reward=-21.466253, Minibatch Loss= 0.4740, Training Accuracy= 0.938, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1410, Reward=-24.543121, Minibatch Loss= 0.6567, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1220, Reward=-11.431791, Minibatch Loss= 0.4556, Training Accuracy= 0.930, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1420, Reward=-23.865658, Minibatch Loss= 0.5251, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1230, Reward=-12.456254, Minibatch Loss= 0.4698, Training Accuracy= 0.891, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1430, Reward=-22.606728, Minibatch Loss= 0.6329, Training Accuracy= 0.789, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1240, Reward=-24.976192, Minibatch Loss= 0.5757, Training Accuracy= 0.898, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1440, Reward=-18.169369, Minibatch Loss= 0.6084, Training Accuracy= 0.797, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1250, Reward=-15.77288, Minibatch Loss= 0.6367, Training Accuracy= 0.875, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1450, Reward=-19.102354, Minibatch Loss= 0.4725, Training Accuracy= 0.930, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1260, Reward=-23.486149, Minibatch Loss= 0.6151, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1460, Reward=-13.518646, Minibatch Loss= 0.5653, Training Accuracy= 0.867, Testing Acc= 0.666667, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1470, Reward=-16.601671, Minibatch Loss= 0.5755, Training Accuracy= 0.844, Testing Acc= 0.780702, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1270, Reward=-18.982449, Minibatch Loss= 0.5569, Training Accuracy= 0.891, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1480, Reward=-17.823586, Minibatch Loss= 0.7737, Training Accuracy= 0.742, Testing Acc= 0.535088, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1280, Reward=-11.158588, Minibatch Loss= 0.4652, Training Accuracy= 0.961, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1490, Reward=-21.550766, Minibatch Loss= 0.5691, Training Accuracy= 0.914, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1290, Reward=-14.645426, Minibatch Loss= 0.4900, Training Accuracy= 0.883, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1500, Reward=-13.781458, Minibatch Loss= 0.5567, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1300, Reward=-18.871176, Minibatch Loss= 0.6123, Training Accuracy= 0.875, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1510, Reward=-19.170797, Minibatch Loss= 0.4392, Training Accuracy= 0.945, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1310, Reward=-19.909168, Minibatch Loss= 0.4760, Training Accuracy= 0.898, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1520, Reward=-13.749709, Minibatch Loss= 0.5301, Training Accuracy= 0.836, Testing Acc= 0.438596, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1320, Reward=-14.539253, Minibatch Loss= 0.4649, Training Accuracy= 0.891, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1530, Reward=-29.558435, Minibatch Loss= 0.5491, Training Accuracy= 0.859, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1330, Reward=-12.729208, Minibatch Loss= 0.4102, Training Accuracy= 0.914, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1540, Reward=-23.887285, Minibatch Loss= 0.5569, Training Accuracy= 0.898, Testing Acc= 0.675439, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1340, Reward=-11.393635, Minibatch Loss= 0.3788, Training Accuracy= 0.914, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1550, Reward=-17.894432, Minibatch Loss= 0.6299, Training Accuracy= 0.797, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1350, Reward=-9.155696, Minibatch Loss= 0.4718, Training Accuracy= 0.836, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1560, Reward=-24.955072, Minibatch Loss= 0.5041, Training Accuracy= 0.922, Testing Acc= 0.657895, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1360, Reward=-28.902943, Minibatch Loss= 0.5493, Training Accuracy= 0.867, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1570, Reward=-17.002213, Minibatch Loss= 0.4567, Training Accuracy= 0.906, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1370, Reward=-31.717194, Minibatch Loss= 0.9159, Training Accuracy= 0.656, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1580, Reward=-12.839067, Minibatch Loss= 0.4174, Training Accuracy= 0.898, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1380, Reward=-24.922087, Minibatch Loss= 0.6525, Training Accuracy= 0.883, Testing Acc= 0.771930, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1590, Reward=-10.56645, Minibatch Loss= 0.4618, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1390, Reward=-18.532768, Minibatch Loss= 0.4801, Training Accuracy= 0.898, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1600, Reward=-11.990063, Minibatch Loss= 0.3702, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1400, Reward=-17.334814, Minibatch Loss= 0.5064, Training Accuracy= 0.883, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1610, Reward=-18.229742, Minibatch Loss= 0.6371, Training Accuracy= 0.836, Testing Acc= 0.535088, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1410, Reward=-19.79898, Minibatch Loss= 0.4564, Training Accuracy= 0.922, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1620, Reward=-21.191523, Minibatch Loss= 0.6451, Training Accuracy= 0.898, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1420, Reward=-13.717008, Minibatch Loss= 0.4654, Training Accuracy= 0.922, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1630, Reward=-15.500597, Minibatch Loss= 0.7556, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1430, Reward=-15.225152, Minibatch Loss= 0.5431, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1640, Reward=-18.399708, Minibatch Loss= 0.7158, Training Accuracy= 0.844, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1440, Reward=-18.709486, Minibatch Loss= 0.4718, Training Accuracy= 0.836, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1650, Reward=-17.090523, Minibatch Loss= 0.6103, Training Accuracy= 0.828, Testing Acc= 0.561404, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1450, Reward=-24.630533, Minibatch Loss= 0.6232, Training Accuracy= 0.898, Testing Acc= 0.789474, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1660, Reward=-21.64434, Minibatch Loss= 0.5663, Training Accuracy= 0.867, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1460, Reward=-27.188198, Minibatch Loss= 0.7222, Training Accuracy= 0.906, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1670, Reward=-15.95857, Minibatch Loss= 0.5223, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1470, Reward=-14.427149, Minibatch Loss= 0.6338, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1680, Reward=-13.04932, Minibatch Loss= 0.4910, Training Accuracy= 0.922, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1690, Reward=-16.884169, Minibatch Loss= 0.4960, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1480, Reward=-14.543828, Minibatch Loss= 0.6170, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1700, Reward=-16.00133, Minibatch Loss= 0.3831, Training Accuracy= 0.938, Testing Acc= 0.578947, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1490, Reward=-13.628475, Minibatch Loss= 0.4299, Training Accuracy= 0.945, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1710, Reward=-13.619381, Minibatch Loss= 0.6134, Training Accuracy= 0.797, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1500, Reward=-9.6404505, Minibatch Loss= 0.3882, Training Accuracy= 0.945, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1720, Reward=-28.412453, Minibatch Loss= 0.6424, Training Accuracy= 0.797, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1510, Reward=-14.857195, Minibatch Loss= 0.3990, Training Accuracy= 0.922, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1730, Reward=-29.336895, Minibatch Loss= 0.6024, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1520, Reward=-14.886376, Minibatch Loss= 0.7853, Training Accuracy= 0.828, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1740, Reward=-24.557014, Minibatch Loss= 0.6109, Training Accuracy= 0.852, Testing Acc= 0.508772, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1530, Reward=-27.98792, Minibatch Loss= 0.6270, Training Accuracy= 0.844, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1750, Reward=-19.49806, Minibatch Loss= 0.5204, Training Accuracy= 0.906, Testing Acc= 0.728070, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1540, Reward=-23.930656, Minibatch Loss= 0.4939, Training Accuracy= 0.938, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1760, Reward=-20.730478, Minibatch Loss= 0.5102, Training Accuracy= 0.898, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1550, Reward=-22.343443, Minibatch Loss= 0.6423, Training Accuracy= 0.859, Testing Acc= 0.789474, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1770, Reward=-16.560923, Minibatch Loss= 0.4830, Training Accuracy= 0.914, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1560, Reward=-20.44235, Minibatch Loss= 0.5692, Training Accuracy= 0.891, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1780, Reward=-15.221193, Minibatch Loss= 0.4816, Training Accuracy= 0.891, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1570, Reward=-17.034657, Minibatch Loss= 0.5329, Training Accuracy= 0.906, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1790, Reward=-12.737564, Minibatch Loss= 0.4824, Training Accuracy= 0.914, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1580, Reward=-20.165022, Minibatch Loss= 0.6458, Training Accuracy= 0.859, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1800, Reward=-13.253701, Minibatch Loss= 0.5138, Training Accuracy= 0.883, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1590, Reward=-15.195537, Minibatch Loss= 0.5251, Training Accuracy= 0.891, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1810, Reward=-17.84885, Minibatch Loss= 0.5198, Training Accuracy= 0.867, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1600, Reward=-19.949484, Minibatch Loss= 0.5244, Training Accuracy= 0.922, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1820, Reward=-24.095577, Minibatch Loss= 0.4453, Training Accuracy= 0.883, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1610, Reward=-28.524052, Minibatch Loss= 0.5357, Training Accuracy= 0.898, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1830, Reward=-16.293297, Minibatch Loss= 0.4767, Training Accuracy= 0.906, Testing Acc= 0.701754, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1620, Reward=-34.84548, Minibatch Loss= 0.7414, Training Accuracy= 0.820, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1840, Reward=-23.323467, Minibatch Loss= 0.5980, Training Accuracy= 0.875, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1630, Reward=-32.135254, Minibatch Loss= 0.6916, Training Accuracy= 0.836, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1850, Reward=-20.81251, Minibatch Loss= 0.5110, Training Accuracy= 0.922, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1640, Reward=-24.23185, Minibatch Loss= 0.6082, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1860, Reward=-18.173878, Minibatch Loss= 0.6085, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1650, Reward=-22.9348, Minibatch Loss= 0.6284, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1870, Reward=-22.391766, Minibatch Loss= 0.4953, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1660, Reward=-26.134735, Minibatch Loss= 0.7360, Training Accuracy= 0.852, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1880, Reward=-14.074545, Minibatch Loss= 0.5185, Training Accuracy= 0.875, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1670, Reward=-29.63913, Minibatch Loss= 0.6978, Training Accuracy= 0.844, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1890, Reward=-17.135242, Minibatch Loss= 0.3830, Training Accuracy= 0.938, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1680, Reward=-25.890083, Minibatch Loss= 1.0018, Training Accuracy= 0.820, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1900, Reward=-17.507196, Minibatch Loss= 0.6122, Training Accuracy= 0.883, Testing Acc= 0.640351, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1690, Reward=-36.372612, Minibatch Loss= 0.8296, Training Accuracy= 0.859, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1910, Reward=-22.985247, Minibatch Loss= 0.5209, Training Accuracy= 0.852, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1700, Reward=-19.246538, Minibatch Loss= 0.9173, Training Accuracy= 0.797, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1920, Reward=-19.605011, Minibatch Loss= 0.3961, Training Accuracy= 0.898, Testing Acc= 0.631579, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1710, Reward=-24.93613, Minibatch Loss= 0.8160, Training Accuracy= 0.867, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1930, Reward=-8.366607, Minibatch Loss= 0.4938, Training Accuracy= 0.906, Testing Acc= 0.798246, Max Final Accuracy=  0.807018, Max AUC=  0.865035, Max AP=  0.895294\n",
      "Step 1720, Reward=-28.919113, Minibatch Loss= 0.8478, Training Accuracy= 0.836, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1940, Reward=-26.419865, Minibatch Loss= 0.5428, Training Accuracy= 0.922, Testing Acc= 0.824561, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1730, Reward=-25.631107, Minibatch Loss= 0.7582, Training Accuracy= 0.844, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1950, Reward=-14.604168, Minibatch Loss= 0.5041, Training Accuracy= 0.891, Testing Acc= 0.807018, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1740, Reward=-28.698914, Minibatch Loss= 0.6978, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1960, Reward=-13.660193, Minibatch Loss= 0.4393, Training Accuracy= 0.898, Testing Acc= 0.666667, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1750, Reward=-20.948162, Minibatch Loss= 0.7233, Training Accuracy= 0.805, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1970, Reward=-6.673615, Minibatch Loss= 0.4993, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1760, Reward=-21.737495, Minibatch Loss= 0.5726, Training Accuracy= 0.898, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1980, Reward=-15.863935, Minibatch Loss= 0.9188, Training Accuracy= 0.781, Testing Acc= 0.587719, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1770, Reward=-26.274334, Minibatch Loss= 0.6443, Training Accuracy= 0.867, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 1990, Reward=-30.230991, Minibatch Loss= 0.6209, Training Accuracy= 0.867, Testing Acc= 0.824561, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1780, Reward=-19.756458, Minibatch Loss= 0.7094, Training Accuracy= 0.844, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2000, Reward=-9.533156, Minibatch Loss= 0.6025, Training Accuracy= 0.906, Testing Acc= 0.666667, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1790, Reward=-28.142942, Minibatch Loss= 0.5726, Training Accuracy= 0.906, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2010, Reward=-6.5798554, Minibatch Loss= 0.6800, Training Accuracy= 0.797, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1800, Reward=-11.522191, Minibatch Loss= 0.5750, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2020, Reward=-15.874059, Minibatch Loss= 0.4302, Training Accuracy= 0.914, Testing Acc= 0.614035, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1810, Reward=-18.733294, Minibatch Loss= 0.6783, Training Accuracy= 0.820, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2030, Reward=-12.862488, Minibatch Loss= 0.4363, Training Accuracy= 0.891, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1820, Reward=-24.173098, Minibatch Loss= 0.7359, Training Accuracy= 0.875, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2040, Reward=-12.251229, Minibatch Loss= 0.3889, Training Accuracy= 0.922, Testing Acc= 0.605263, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1830, Reward=-25.755642, Minibatch Loss= 0.6701, Training Accuracy= 0.867, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2050, Reward=-18.57102, Minibatch Loss= 0.4618, Training Accuracy= 0.859, Testing Acc= 0.578947, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1840, Reward=-18.678236, Minibatch Loss= 0.6371, Training Accuracy= 0.836, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2060, Reward=-22.443714, Minibatch Loss= 0.5365, Training Accuracy= 0.859, Testing Acc= 0.614035, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1850, Reward=-12.310583, Minibatch Loss= 0.6169, Training Accuracy= 0.859, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2070, Reward=-25.348982, Minibatch Loss= 0.5131, Training Accuracy= 0.922, Testing Acc= 0.649123, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1860, Reward=-19.707027, Minibatch Loss= 0.8943, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2080, Reward=-23.327831, Minibatch Loss= 0.4936, Training Accuracy= 0.875, Testing Acc= 0.710526, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1870, Reward=-25.816101, Minibatch Loss= 0.5900, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2090, Reward=-17.556591, Minibatch Loss= 0.4502, Training Accuracy= 0.875, Testing Acc= 0.622807, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1880, Reward=-19.881254, Minibatch Loss= 0.6138, Training Accuracy= 0.859, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2100, Reward=-18.019463, Minibatch Loss= 0.3813, Training Accuracy= 0.938, Testing Acc= 0.631579, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1890, Reward=-18.00058, Minibatch Loss= 0.5694, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2110, Reward=-12.486335, Minibatch Loss= 0.3845, Training Accuracy= 0.938, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1900, Reward=-19.711182, Minibatch Loss= 0.6503, Training Accuracy= 0.852, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2120, Reward=-10.859665, Minibatch Loss= 0.3997, Training Accuracy= 0.922, Testing Acc= 0.736842, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1910, Reward=-20.754934, Minibatch Loss= 0.5557, Training Accuracy= 0.914, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2130, Reward=-20.452564, Minibatch Loss= 0.5008, Training Accuracy= 0.898, Testing Acc= 0.552632, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1920, Reward=-15.90477, Minibatch Loss= 0.5850, Training Accuracy= 0.898, Testing Acc= 0.789474, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2140, Reward=-25.730392, Minibatch Loss= 0.5985, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1930, Reward=-11.609915, Minibatch Loss= 0.6910, Training Accuracy= 0.898, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2150, Reward=-15.55696, Minibatch Loss= 0.6556, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1940, Reward=-15.541546, Minibatch Loss= 0.5349, Training Accuracy= 0.883, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2160, Reward=-12.8588085, Minibatch Loss= 0.4656, Training Accuracy= 0.922, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1950, Reward=-19.761675, Minibatch Loss= 0.5118, Training Accuracy= 0.898, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2170, Reward=-19.74847, Minibatch Loss= 0.4105, Training Accuracy= 0.945, Testing Acc= 0.631579, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1960, Reward=-13.968634, Minibatch Loss= 0.4838, Training Accuracy= 0.891, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2180, Reward=-7.520888, Minibatch Loss= 0.3524, Training Accuracy= 0.938, Testing Acc= 0.561404, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1970, Reward=-14.344537, Minibatch Loss= 0.5516, Training Accuracy= 0.820, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2190, Reward=-25.392395, Minibatch Loss= 0.9796, Training Accuracy= 0.750, Testing Acc= 0.780702, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1980, Reward=-20.497425, Minibatch Loss= 0.5116, Training Accuracy= 0.945, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2200, Reward=-20.071419, Minibatch Loss= 0.5856, Training Accuracy= 0.914, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 1990, Reward=-20.853746, Minibatch Loss= 0.5429, Training Accuracy= 0.875, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2210, Reward=-21.117615, Minibatch Loss= 0.7073, Training Accuracy= 0.836, Testing Acc= 0.614035, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2000, Reward=-20.056612, Minibatch Loss= 0.5658, Training Accuracy= 0.875, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2220, Reward=-22.400215, Minibatch Loss= 0.6465, Training Accuracy= 0.859, Testing Acc= 0.649123, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2010, Reward=-14.838833, Minibatch Loss= 0.4552, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2230, Reward=-16.55909, Minibatch Loss= 0.4842, Training Accuracy= 0.914, Testing Acc= 0.631579, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2020, Reward=-15.669548, Minibatch Loss= 0.5324, Training Accuracy= 0.852, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2240, Reward=-11.69385, Minibatch Loss= 0.5410, Training Accuracy= 0.891, Testing Acc= 0.657895, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2030, Reward=-14.675367, Minibatch Loss= 0.5064, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2250, Reward=-21.232267, Minibatch Loss= 0.7251, Training Accuracy= 0.797, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2040, Reward=-18.799053, Minibatch Loss= 0.5143, Training Accuracy= 0.883, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2260, Reward=-21.37252, Minibatch Loss= 0.6859, Training Accuracy= 0.828, Testing Acc= 0.789474, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2050, Reward=-9.362841, Minibatch Loss= 0.4572, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2270, Reward=-28.97226, Minibatch Loss= 0.7014, Training Accuracy= 0.875, Testing Acc= 0.710526, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2060, Reward=-14.412403, Minibatch Loss= 0.5106, Training Accuracy= 0.852, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2280, Reward=-27.609392, Minibatch Loss= 0.6760, Training Accuracy= 0.898, Testing Acc= 0.692982, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2070, Reward=-23.811298, Minibatch Loss= 0.4647, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2290, Reward=-21.094498, Minibatch Loss= 0.7292, Training Accuracy= 0.836, Testing Acc= 0.631579, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2080, Reward=-26.958183, Minibatch Loss= 0.5041, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2300, Reward=-17.34876, Minibatch Loss= 0.6770, Training Accuracy= 0.898, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2090, Reward=-17.90713, Minibatch Loss= 0.4057, Training Accuracy= 0.938, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2310, Reward=-24.692358, Minibatch Loss= 0.7056, Training Accuracy= 0.875, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2100, Reward=-13.843579, Minibatch Loss= 0.4142, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2320, Reward=-28.985117, Minibatch Loss= 0.7007, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2110, Reward=-12.917706, Minibatch Loss= 0.4253, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2330, Reward=-22.57486, Minibatch Loss= 0.7463, Training Accuracy= 0.789, Testing Acc= 0.596491, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2120, Reward=-14.158054, Minibatch Loss= 0.4391, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2340, Reward=-30.098227, Minibatch Loss= 0.7283, Training Accuracy= 0.852, Testing Acc= 0.736842, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2130, Reward=-17.031519, Minibatch Loss= 0.5486, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2350, Reward=-20.98839, Minibatch Loss= 0.8272, Training Accuracy= 0.844, Testing Acc= 0.631579, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2140, Reward=-21.590195, Minibatch Loss= 0.4385, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2360, Reward=-33.36461, Minibatch Loss= 0.7697, Training Accuracy= 0.938, Testing Acc= 0.807018, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2150, Reward=-14.51264, Minibatch Loss= 0.4987, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2370, Reward=-21.355993, Minibatch Loss= 0.7284, Training Accuracy= 0.883, Testing Acc= 0.763158, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2160, Reward=-18.64191, Minibatch Loss= 0.4881, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2380, Reward=-29.08853, Minibatch Loss= 0.6676, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2170, Reward=-24.417528, Minibatch Loss= 0.4805, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2390, Reward=-19.727484, Minibatch Loss= 0.6734, Training Accuracy= 0.914, Testing Acc= 0.517544, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2180, Reward=-13.3889675, Minibatch Loss= 0.5155, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2400, Reward=-24.38332, Minibatch Loss= 0.7009, Training Accuracy= 0.852, Testing Acc= 0.692982, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2190, Reward=-15.38672, Minibatch Loss= 0.4911, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2410, Reward=-26.083914, Minibatch Loss= 0.6062, Training Accuracy= 0.898, Testing Acc= 0.543860, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2200, Reward=-24.161064, Minibatch Loss= 0.6245, Training Accuracy= 0.883, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2420, Reward=-19.1758, Minibatch Loss= 1.0494, Training Accuracy= 0.719, Testing Acc= 0.587719, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2210, Reward=-18.680979, Minibatch Loss= 0.5297, Training Accuracy= 0.891, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2430, Reward=-24.738712, Minibatch Loss= 0.9597, Training Accuracy= 0.742, Testing Acc= 0.491228, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2220, Reward=-15.044115, Minibatch Loss= 0.6246, Training Accuracy= 0.828, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2440, Reward=-32.159256, Minibatch Loss= 0.7423, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2230, Reward=-24.536386, Minibatch Loss= 0.6090, Training Accuracy= 0.789, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2450, Reward=-26.416058, Minibatch Loss= 0.6823, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2240, Reward=-25.083582, Minibatch Loss= 0.6087, Training Accuracy= 0.852, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2460, Reward=-18.52279, Minibatch Loss= 0.6754, Training Accuracy= 0.922, Testing Acc= 0.570175, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2250, Reward=-20.29004, Minibatch Loss= 0.5922, Training Accuracy= 0.812, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2470, Reward=-15.78155, Minibatch Loss= 0.6396, Training Accuracy= 0.859, Testing Acc= 0.614035, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2260, Reward=-7.4234138, Minibatch Loss= 0.5026, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2480, Reward=-20.047935, Minibatch Loss= 0.5504, Training Accuracy= 0.875, Testing Acc= 0.578947, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2270, Reward=-11.642544, Minibatch Loss= 0.4376, Training Accuracy= 0.875, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2490, Reward=-22.509682, Minibatch Loss= 0.5610, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2280, Reward=-18.608307, Minibatch Loss= 0.4740, Training Accuracy= 0.891, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2500, Reward=-16.212152, Minibatch Loss= 0.4626, Training Accuracy= 0.945, Testing Acc= 0.517544, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2290, Reward=-22.825266, Minibatch Loss= 0.7991, Training Accuracy= 0.773, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2510, Reward=-9.304197, Minibatch Loss= 0.5228, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2300, Reward=-31.619347, Minibatch Loss= 0.7843, Training Accuracy= 0.867, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2520, Reward=-12.94819, Minibatch Loss= 0.5804, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2310, Reward=-25.622274, Minibatch Loss= 0.7669, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2530, Reward=-21.726002, Minibatch Loss= 0.6421, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2320, Reward=-18.998644, Minibatch Loss= 0.6371, Training Accuracy= 0.883, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2540, Reward=-14.974894, Minibatch Loss= 0.7718, Training Accuracy= 0.867, Testing Acc= 0.482456, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2550, Reward=-28.21294, Minibatch Loss= 0.7202, Training Accuracy= 0.859, Testing Acc= 0.535088, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2330, Reward=-12.280941, Minibatch Loss= 0.5629, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2560, Reward=-28.906664, Minibatch Loss= 0.7622, Training Accuracy= 0.828, Testing Acc= 0.605263, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2340, Reward=-20.29473, Minibatch Loss= 0.6500, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2570, Reward=-20.623201, Minibatch Loss= 0.8394, Training Accuracy= 0.867, Testing Acc= 0.570175, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2350, Reward=-30.96273, Minibatch Loss= 0.7188, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2580, Reward=-26.844427, Minibatch Loss= 0.7605, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2360, Reward=-25.222103, Minibatch Loss= 0.6108, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2590, Reward=-24.013472, Minibatch Loss= 0.7113, Training Accuracy= 0.852, Testing Acc= 0.543860, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2370, Reward=-11.547613, Minibatch Loss= 0.5466, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2600, Reward=-18.403463, Minibatch Loss= 0.6382, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2380, Reward=-11.135328, Minibatch Loss= 0.5179, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2610, Reward=-18.448355, Minibatch Loss= 0.5674, Training Accuracy= 0.906, Testing Acc= 0.464912, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2390, Reward=-15.637207, Minibatch Loss= 0.5406, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2620, Reward=-16.43986, Minibatch Loss= 0.5242, Training Accuracy= 0.930, Testing Acc= 0.675439, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2400, Reward=-18.148546, Minibatch Loss= 0.6654, Training Accuracy= 0.852, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2630, Reward=-17.45403, Minibatch Loss= 0.4944, Training Accuracy= 0.930, Testing Acc= 0.526316, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2410, Reward=-19.285557, Minibatch Loss= 0.4898, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2640, Reward=-9.254852, Minibatch Loss= 0.4749, Training Accuracy= 0.922, Testing Acc= 0.508772, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2420, Reward=-13.051456, Minibatch Loss= 0.4253, Training Accuracy= 0.953, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2650, Reward=-9.273217, Minibatch Loss= 0.3807, Training Accuracy= 0.938, Testing Acc= 0.491228, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2430, Reward=-11.20682, Minibatch Loss= 0.4008, Training Accuracy= 0.914, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2660, Reward=-15.663577, Minibatch Loss= 0.4768, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2440, Reward=-10.434304, Minibatch Loss= 0.3500, Training Accuracy= 0.938, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2670, Reward=-22.106533, Minibatch Loss= 0.5364, Training Accuracy= 0.898, Testing Acc= 0.657895, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2450, Reward=-11.871772, Minibatch Loss= 0.4885, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2680, Reward=-21.000984, Minibatch Loss= 0.9346, Training Accuracy= 0.828, Testing Acc= 0.701754, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2460, Reward=-14.960763, Minibatch Loss= 0.4523, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2690, Reward=-21.748653, Minibatch Loss= 0.7761, Training Accuracy= 0.891, Testing Acc= 0.719298, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2470, Reward=-6.3056445, Minibatch Loss= 0.3973, Training Accuracy= 0.898, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2700, Reward=-17.62603, Minibatch Loss= 0.7465, Training Accuracy= 0.828, Testing Acc= 0.710526, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2480, Reward=-14.315318, Minibatch Loss= 0.4973, Training Accuracy= 0.836, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2710, Reward=-20.519974, Minibatch Loss= 0.5846, Training Accuracy= 0.875, Testing Acc= 0.640351, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2490, Reward=-17.383018, Minibatch Loss= 0.4275, Training Accuracy= 0.922, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2720, Reward=-16.95934, Minibatch Loss= 0.6673, Training Accuracy= 0.836, Testing Acc= 0.666667, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2500, Reward=-12.959169, Minibatch Loss= 0.3785, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2730, Reward=-16.099138, Minibatch Loss= 0.6034, Training Accuracy= 0.875, Testing Acc= 0.692982, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2510, Reward=-10.909901, Minibatch Loss= 0.4152, Training Accuracy= 0.883, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2740, Reward=-13.526303, Minibatch Loss= 0.6341, Training Accuracy= 0.859, Testing Acc= 0.596491, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2520, Reward=-9.76231, Minibatch Loss= 0.4391, Training Accuracy= 0.883, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2750, Reward=-18.002865, Minibatch Loss= 0.5040, Training Accuracy= 0.898, Testing Acc= 0.552632, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2530, Reward=-13.016495, Minibatch Loss= 0.3928, Training Accuracy= 0.922, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2760, Reward=-14.191243, Minibatch Loss= 0.4725, Training Accuracy= 0.922, Testing Acc= 0.605263, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2540, Reward=-6.4802475, Minibatch Loss= 0.3872, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2770, Reward=-12.652401, Minibatch Loss= 0.3875, Training Accuracy= 0.922, Testing Acc= 0.596491, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2550, Reward=-16.199879, Minibatch Loss= 0.3232, Training Accuracy= 0.938, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2780, Reward=-5.7681904, Minibatch Loss= 0.5683, Training Accuracy= 0.828, Testing Acc= 0.491228, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2560, Reward=-19.530304, Minibatch Loss= 0.6672, Training Accuracy= 0.805, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2790, Reward=-19.873425, Minibatch Loss= 0.5357, Training Accuracy= 0.883, Testing Acc= 0.473684, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2570, Reward=-25.12792, Minibatch Loss= 0.6646, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2800, Reward=-12.742535, Minibatch Loss= 0.5276, Training Accuracy= 0.906, Testing Acc= 0.473684, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2580, Reward=-20.849491, Minibatch Loss= 0.6061, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2810, Reward=-12.257125, Minibatch Loss= 0.4579, Training Accuracy= 0.898, Testing Acc= 0.491228, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2590, Reward=-15.868868, Minibatch Loss= 0.5413, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2820, Reward=-16.921438, Minibatch Loss= 0.6634, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2600, Reward=-16.28468, Minibatch Loss= 0.5333, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2830, Reward=-28.29571, Minibatch Loss= 0.4460, Training Accuracy= 0.859, Testing Acc= 0.473684, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2610, Reward=-12.258577, Minibatch Loss= 0.4439, Training Accuracy= 0.906, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2840, Reward=-19.531311, Minibatch Loss= 0.4353, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2620, Reward=-13.002502, Minibatch Loss= 0.4137, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2850, Reward=-17.761482, Minibatch Loss= 0.3793, Training Accuracy= 0.922, Testing Acc= 0.508772, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2630, Reward=-7.3450117, Minibatch Loss= 0.4014, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2860, Reward=-16.904581, Minibatch Loss= 0.5868, Training Accuracy= 0.922, Testing Acc= 0.473684, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2640, Reward=-11.48574, Minibatch Loss= 0.4416, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2870, Reward=-18.53045, Minibatch Loss= 0.5978, Training Accuracy= 0.891, Testing Acc= 0.482456, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2650, Reward=-24.161411, Minibatch Loss= 0.4102, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2880, Reward=-9.080623, Minibatch Loss= 0.5099, Training Accuracy= 0.906, Testing Acc= 0.491228, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2660, Reward=-10.228908, Minibatch Loss= 0.3730, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2890, Reward=-7.54578, Minibatch Loss= 0.4036, Training Accuracy= 0.906, Testing Acc= 0.491228, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2670, Reward=-9.025478, Minibatch Loss= 0.7663, Training Accuracy= 0.797, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2900, Reward=-3.3233464, Minibatch Loss= 0.3761, Training Accuracy= 0.953, Testing Acc= 0.491228, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2680, Reward=-31.81404, Minibatch Loss= 0.7640, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2910, Reward=-14.68766, Minibatch Loss= 0.5357, Training Accuracy= 0.820, Testing Acc= 0.535088, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2690, Reward=-17.846489, Minibatch Loss= 0.8148, Training Accuracy= 0.828, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2920, Reward=-26.402824, Minibatch Loss= 0.7599, Training Accuracy= 0.875, Testing Acc= 0.508772, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2700, Reward=-14.918478, Minibatch Loss= 0.6117, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2930, Reward=-27.538433, Minibatch Loss= 0.5461, Training Accuracy= 0.945, Testing Acc= 0.614035, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2940, Reward=-11.511262, Minibatch Loss= 0.4926, Training Accuracy= 0.922, Testing Acc= 0.508772, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2710, Reward=-20.836191, Minibatch Loss= 0.4537, Training Accuracy= 0.938, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2950, Reward=-15.863076, Minibatch Loss= 0.4630, Training Accuracy= 0.898, Testing Acc= 0.482456, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2720, Reward=-16.390226, Minibatch Loss= 0.4129, Training Accuracy= 0.938, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2960, Reward=-11.53143, Minibatch Loss= 0.3659, Training Accuracy= 0.930, Testing Acc= 0.482456, Max Final Accuracy=  0.824561, Max AUC=  0.877039, Max AP=  0.897632\n",
      "Step 2730, Reward=-14.130156, Minibatch Loss= 0.7416, Training Accuracy= 0.781, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 2950, Reward=-10.15091, Minibatch Loss= 0.5076, Training Accuracy= 0.938, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 170, Reward=-28.785582, Minibatch Loss= 0.6220, Training Accuracy= 0.844, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2960, Reward=-18.313446, Minibatch Loss= 0.6053, Training Accuracy= 0.859, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 180, Reward=-24.921997, Minibatch Loss= 0.6221, Training Accuracy= 0.828, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2970, Reward=-16.70177, Minibatch Loss= 0.4512, Training Accuracy= 0.930, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 190, Reward=-31.301565, Minibatch Loss= 0.7203, Training Accuracy= 0.797, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2980, Reward=-12.3342085, Minibatch Loss= 0.4580, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 200, Reward=-20.118681, Minibatch Loss= 0.6263, Training Accuracy= 0.844, Testing Acc= 0.728070, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2990, Reward=-18.265245, Minibatch Loss= 0.4466, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.867498, Max AP=  0.892875\n",
      "Step 210, Reward=-25.000713, Minibatch Loss= 0.6293, Training Accuracy= 0.852, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.81578946\n",
      "Step 220, Reward=-18.635473, Minibatch Loss= 0.6683, Training Accuracy= 0.758, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 230, Reward=-25.672676, Minibatch Loss= 0.6752, Training Accuracy= 0.844, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 240, Reward=-20.558434, Minibatch Loss= 0.7842, Training Accuracy= 0.773, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 10, Reward=-46.01289, Minibatch Loss= 1.0664, Training Accuracy= 0.500, Testing Acc= 0.500000, Max Final Accuracy=  0.500000, Max AUC=  0.234534, Max AP=  0.378459\n",
      "Step 250, Reward=-17.998842, Minibatch Loss= 0.6526, Training Accuracy= 0.883, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 20, Reward=-64.049385, Minibatch Loss= 0.7520, Training Accuracy= 0.688, Testing Acc= 0.614035, Max Final Accuracy=  0.614035, Max AUC=  0.749769, Max AP=  0.753801\n",
      "Step 260, Reward=-25.27428, Minibatch Loss= 0.6973, Training Accuracy= 0.812, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 30, Reward=-51.9153, Minibatch Loss= 0.6765, Training Accuracy= 0.789, Testing Acc= 0.631579, Max Final Accuracy=  0.631579, Max AUC=  0.813789, Max AP=  0.806568\n",
      "Step 270, Reward=-31.344694, Minibatch Loss= 0.6931, Training Accuracy= 0.812, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 40, Reward=-44.088005, Minibatch Loss= 0.6404, Training Accuracy= 0.773, Testing Acc= 0.701754, Max Final Accuracy=  0.701754, Max AUC=  0.850108, Max AP=  0.882193\n",
      "Step 280, Reward=-34.090603, Minibatch Loss= 0.7092, Training Accuracy= 0.812, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 290, Reward=-27.632473, Minibatch Loss= 0.6535, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 50, Reward=-44.898872, Minibatch Loss= 0.6921, Training Accuracy= 0.742, Testing Acc= 0.675439, Max Final Accuracy=  0.701754, Max AUC=  0.850108, Max AP=  0.882193\n",
      "Step 300, Reward=-17.377886, Minibatch Loss= 0.6151, Training Accuracy= 0.883, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 60, Reward=-37.296898, Minibatch Loss= 0.6541, Training Accuracy= 0.820, Testing Acc= 0.543860, Max Final Accuracy=  0.701754, Max AUC=  0.850108, Max AP=  0.882193\n",
      "Step 310, Reward=-25.320381, Minibatch Loss= 0.5803, Training Accuracy= 0.875, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 70, Reward=-37.974644, Minibatch Loss= 0.6014, Training Accuracy= 0.820, Testing Acc= 0.500000, Max Final Accuracy=  0.701754, Max AUC=  0.850108, Max AP=  0.882193\n",
      "Step 320, Reward=-31.813139, Minibatch Loss= 0.6903, Training Accuracy= 0.867, Testing Acc= 0.473684, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 80, Reward=-35.320488, Minibatch Loss= 0.6368, Training Accuracy= 0.773, Testing Acc= 0.500000, Max Final Accuracy=  0.701754, Max AUC=  0.850108, Max AP=  0.882193\n",
      "Step 330, Reward=-22.592701, Minibatch Loss= 0.6193, Training Accuracy= 0.859, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 90, Reward=-28.945034, Minibatch Loss= 0.6320, Training Accuracy= 0.820, Testing Acc= 0.675439, Max Final Accuracy=  0.701754, Max AUC=  0.850108, Max AP=  0.882193\n",
      "Step 340, Reward=-20.096497, Minibatch Loss= 0.6333, Training Accuracy= 0.852, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 100, Reward=-26.992664, Minibatch Loss= 0.6916, Training Accuracy= 0.750, Testing Acc= 0.754386, Max Final Accuracy=  0.754386, Max AUC=  0.862881, Max AP=  0.890319\n",
      "Step 350, Reward=-20.783947, Minibatch Loss= 0.5580, Training Accuracy= 0.836, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 110, Reward=-23.183617, Minibatch Loss= 0.5698, Training Accuracy= 0.836, Testing Acc= 0.657895, Max Final Accuracy=  0.754386, Max AUC=  0.862881, Max AP=  0.890319\n",
      "Step 360, Reward=-27.7213, Minibatch Loss= 0.8521, Training Accuracy= 0.742, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 120, Reward=-30.655815, Minibatch Loss= 0.8786, Training Accuracy= 0.711, Testing Acc= 0.719298, Max Final Accuracy=  0.754386, Max AUC=  0.862881, Max AP=  0.890319\n",
      "Step 370, Reward=-28.029305, Minibatch Loss= 0.6994, Training Accuracy= 0.867, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 130, Reward=-26.617619, Minibatch Loss= 0.7716, Training Accuracy= 0.773, Testing Acc= 0.807018, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 380, Reward=-26.795185, Minibatch Loss= 0.6487, Training Accuracy= 0.789, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 140, Reward=-29.678, Minibatch Loss= 0.6206, Training Accuracy= 0.820, Testing Acc= 0.605263, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 390, Reward=-21.680082, Minibatch Loss= 0.5710, Training Accuracy= 0.891, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 150, Reward=-30.907265, Minibatch Loss= 0.6742, Training Accuracy= 0.820, Testing Acc= 0.596491, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 400, Reward=-15.306121, Minibatch Loss= 0.5509, Training Accuracy= 0.859, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 160, Reward=-25.727777, Minibatch Loss= 0.5514, Training Accuracy= 0.891, Testing Acc= 0.649123, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 410, Reward=-25.381977, Minibatch Loss= 0.6587, Training Accuracy= 0.812, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 170, Reward=-31.805246, Minibatch Loss= 0.6689, Training Accuracy= 0.836, Testing Acc= 0.543860, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 420, Reward=-12.869104, Minibatch Loss= 0.5421, Training Accuracy= 0.859, Testing Acc= 0.482456, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 180, Reward=-27.978521, Minibatch Loss= 0.6925, Training Accuracy= 0.859, Testing Acc= 0.657895, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 430, Reward=-18.857122, Minibatch Loss= 0.5687, Training Accuracy= 0.836, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 190, Reward=-25.281452, Minibatch Loss= 0.6048, Training Accuracy= 0.836, Testing Acc= 0.657895, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 440, Reward=-26.756413, Minibatch Loss= 0.6454, Training Accuracy= 0.805, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 200, Reward=-23.661957, Minibatch Loss= 0.4929, Training Accuracy= 0.906, Testing Acc= 0.692982, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 450, Reward=-23.678902, Minibatch Loss= 0.6174, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 210, Reward=-23.24496, Minibatch Loss= 0.5560, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 460, Reward=-26.011599, Minibatch Loss= 0.7567, Training Accuracy= 0.750, Testing Acc= 0.815789, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 470, Reward=-23.775755, Minibatch Loss= 0.6190, Training Accuracy= 0.867, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 220, Reward=-26.987309, Minibatch Loss= 0.6707, Training Accuracy= 0.844, Testing Acc= 0.570175, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 480, Reward=-19.130114, Minibatch Loss= 0.6432, Training Accuracy= 0.836, Testing Acc= 0.701754, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 230, Reward=-16.900417, Minibatch Loss= 0.5791, Training Accuracy= 0.859, Testing Acc= 0.614035, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 490, Reward=-21.939281, Minibatch Loss= 0.6029, Training Accuracy= 0.836, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 240, Reward=-22.611486, Minibatch Loss= 0.6020, Training Accuracy= 0.805, Testing Acc= 0.622807, Max Final Accuracy=  0.807018, Max AUC=  0.885965, Max AP=  0.906616\n",
      "Step 500, Reward=-26.398848, Minibatch Loss= 0.5560, Training Accuracy= 0.914, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 250, Reward=-28.388836, Minibatch Loss= 0.6670, Training Accuracy= 0.836, Testing Acc= 0.815789, Max Final Accuracy=  0.815789, Max AUC=  0.849338, Max AP=  0.886798\n",
      "Step 510, Reward=-19.842718, Minibatch Loss= 0.9873, Training Accuracy= 0.719, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 260, Reward=-24.148767, Minibatch Loss= 0.8014, Training Accuracy= 0.836, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.849338, Max AP=  0.886798\n",
      "Step 520, Reward=-21.247568, Minibatch Loss= 0.7099, Training Accuracy= 0.828, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 270, Reward=-30.970463, Minibatch Loss= 0.6857, Training Accuracy= 0.828, Testing Acc= 0.798246, Max Final Accuracy=  0.815789, Max AUC=  0.849338, Max AP=  0.886798\n",
      "Step 530, Reward=-29.26976, Minibatch Loss= 0.6296, Training Accuracy= 0.930, Testing Acc= 0.473684, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 280, Reward=-24.647894, Minibatch Loss= 0.6146, Training Accuracy= 0.898, Testing Acc= 0.833333, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 540, Reward=-18.588282, Minibatch Loss= 0.7007, Training Accuracy= 0.836, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 290, Reward=-21.035196, Minibatch Loss= 0.5694, Training Accuracy= 0.898, Testing Acc= 0.657895, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 550, Reward=-22.437286, Minibatch Loss= 0.6041, Training Accuracy= 0.852, Testing Acc= 0.473684, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 300, Reward=-17.720993, Minibatch Loss= 0.5897, Training Accuracy= 0.859, Testing Acc= 0.736842, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 560, Reward=-24.246191, Minibatch Loss= 0.7395, Training Accuracy= 0.805, Testing Acc= 0.491228, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 310, Reward=-25.696745, Minibatch Loss= 0.7363, Training Accuracy= 0.797, Testing Acc= 0.780702, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 570, Reward=-20.715189, Minibatch Loss= 0.7263, Training Accuracy= 0.836, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 320, Reward=-26.52643, Minibatch Loss= 0.7602, Training Accuracy= 0.766, Testing Acc= 0.605263, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 580, Reward=-18.6547, Minibatch Loss= 0.8271, Training Accuracy= 0.750, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 330, Reward=-31.113289, Minibatch Loss= 0.6055, Training Accuracy= 0.875, Testing Acc= 0.684211, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 590, Reward=-21.379333, Minibatch Loss= 0.6673, Training Accuracy= 0.805, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 600, Reward=-22.248657, Minibatch Loss= 0.7338, Training Accuracy= 0.828, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 340, Reward=-28.908543, Minibatch Loss= 0.6742, Training Accuracy= 0.836, Testing Acc= 0.649123, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 610, Reward=-25.34931, Minibatch Loss= 0.5768, Training Accuracy= 0.859, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 350, Reward=-24.021622, Minibatch Loss= 0.5779, Training Accuracy= 0.852, Testing Acc= 0.517544, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 620, Reward=-28.107365, Minibatch Loss= 0.6314, Training Accuracy= 0.852, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 360, Reward=-19.167782, Minibatch Loss= 0.7029, Training Accuracy= 0.805, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 630, Reward=-28.39454, Minibatch Loss= 0.6370, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 370, Reward=-25.841454, Minibatch Loss= 0.6031, Training Accuracy= 0.875, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 640, Reward=-19.708729, Minibatch Loss= 0.5987, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 380, Reward=-24.467358, Minibatch Loss= 0.5842, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 650, Reward=-23.310026, Minibatch Loss= 0.5806, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 390, Reward=-18.572552, Minibatch Loss= 0.5504, Training Accuracy= 0.906, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 400, Reward=-17.850601, Minibatch Loss= 0.5671, Training Accuracy= 0.898, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 660, Reward=-19.694859, Minibatch Loss= 0.5501, Training Accuracy= 0.875, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 670, Reward=-24.297106, Minibatch Loss= 0.5670, Training Accuracy= 0.875, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 410, Reward=-28.433784, Minibatch Loss= 0.6400, Training Accuracy= 0.852, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 680, Reward=-25.809902, Minibatch Loss= 0.6789, Training Accuracy= 0.898, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 420, Reward=-35.441635, Minibatch Loss= 0.7012, Training Accuracy= 0.836, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 690, Reward=-30.30198, Minibatch Loss= 0.7209, Training Accuracy= 0.773, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 430, Reward=-27.288546, Minibatch Loss= 0.6559, Training Accuracy= 0.836, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 700, Reward=-24.632349, Minibatch Loss= 0.6217, Training Accuracy= 0.891, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 440, Reward=-21.007034, Minibatch Loss= 0.6576, Training Accuracy= 0.844, Testing Acc= 0.789474, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 710, Reward=-15.576511, Minibatch Loss= 0.7004, Training Accuracy= 0.859, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 450, Reward=-27.69241, Minibatch Loss= 0.6393, Training Accuracy= 0.820, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 720, Reward=-12.546162, Minibatch Loss= 0.5465, Training Accuracy= 0.883, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 460, Reward=-22.828226, Minibatch Loss= 0.5548, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 730, Reward=-21.745632, Minibatch Loss= 0.7041, Training Accuracy= 0.781, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 470, Reward=-26.660679, Minibatch Loss= 0.6123, Training Accuracy= 0.867, Testing Acc= 0.728070, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 740, Reward=-29.32461, Minibatch Loss= 0.6854, Training Accuracy= 0.828, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 480, Reward=-15.824599, Minibatch Loss= 0.6062, Training Accuracy= 0.844, Testing Acc= 0.719298, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 750, Reward=-28.12918, Minibatch Loss= 0.7784, Training Accuracy= 0.742, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 490, Reward=-20.235537, Minibatch Loss= 0.5154, Training Accuracy= 0.891, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 760, Reward=-19.155977, Minibatch Loss= 0.6639, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 500, Reward=-18.769619, Minibatch Loss= 0.6611, Training Accuracy= 0.844, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 770, Reward=-26.147354, Minibatch Loss= 0.7757, Training Accuracy= 0.805, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 510, Reward=-32.086205, Minibatch Loss= 0.7509, Training Accuracy= 0.734, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 780, Reward=-17.12787, Minibatch Loss= 0.6470, Training Accuracy= 0.836, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 520, Reward=-26.04346, Minibatch Loss= 0.8351, Training Accuracy= 0.781, Testing Acc= 0.684211, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 790, Reward=-24.477907, Minibatch Loss= 0.7131, Training Accuracy= 0.812, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 530, Reward=-27.626184, Minibatch Loss= 0.6225, Training Accuracy= 0.914, Testing Acc= 0.692982, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 800, Reward=-21.941496, Minibatch Loss= 0.5703, Training Accuracy= 0.898, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 540, Reward=-22.140331, Minibatch Loss= 0.5931, Training Accuracy= 0.875, Testing Acc= 0.614035, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 810, Reward=-18.71106, Minibatch Loss= 0.6669, Training Accuracy= 0.828, Testing Acc= 0.789474, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 550, Reward=-20.105965, Minibatch Loss= 0.6028, Training Accuracy= 0.844, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 820, Reward=-27.94949, Minibatch Loss= 0.5178, Training Accuracy= 0.906, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 560, Reward=-15.878762, Minibatch Loss= 0.5574, Training Accuracy= 0.883, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 830, Reward=-20.896677, Minibatch Loss= 0.9452, Training Accuracy= 0.602, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 570, Reward=-15.378238, Minibatch Loss= 0.4848, Training Accuracy= 0.930, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 840, Reward=-29.103844, Minibatch Loss= 0.9332, Training Accuracy= 0.742, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 580, Reward=-14.821705, Minibatch Loss= 0.5295, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 850, Reward=-29.80279, Minibatch Loss= 0.6874, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 590, Reward=-20.120888, Minibatch Loss= 0.4943, Training Accuracy= 0.945, Testing Acc= 0.789474, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 860, Reward=-20.399956, Minibatch Loss= 1.0231, Training Accuracy= 0.766, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 600, Reward=-12.720554, Minibatch Loss= 0.5788, Training Accuracy= 0.914, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 870, Reward=-24.871613, Minibatch Loss= 0.6635, Training Accuracy= 0.875, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 610, Reward=-21.46793, Minibatch Loss= 0.5268, Training Accuracy= 0.906, Testing Acc= 0.798246, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 880, Reward=-29.874084, Minibatch Loss= 0.7293, Training Accuracy= 0.836, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 620, Reward=-21.262787, Minibatch Loss= 0.5442, Training Accuracy= 0.867, Testing Acc= 0.789474, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 890, Reward=-24.706963, Minibatch Loss= 0.6784, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 630, Reward=-14.950913, Minibatch Loss= 0.6159, Training Accuracy= 0.812, Testing Acc= 0.833333, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 900, Reward=-30.57527, Minibatch Loss= 0.6605, Training Accuracy= 0.836, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 640, Reward=-31.645939, Minibatch Loss= 0.5882, Training Accuracy= 0.836, Testing Acc= 0.780702, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 910, Reward=-28.491598, Minibatch Loss= 0.5718, Training Accuracy= 0.914, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 650, Reward=-24.050987, Minibatch Loss= 0.5605, Training Accuracy= 0.883, Testing Acc= 0.789474, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 920, Reward=-13.227243, Minibatch Loss= 0.5111, Training Accuracy= 0.922, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 660, Reward=-22.075798, Minibatch Loss= 0.6279, Training Accuracy= 0.789, Testing Acc= 0.666667, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 930, Reward=-18.746914, Minibatch Loss= 0.5639, Training Accuracy= 0.875, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 670, Reward=-22.382332, Minibatch Loss= 0.5544, Training Accuracy= 0.859, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 940, Reward=-27.40174, Minibatch Loss= 0.8262, Training Accuracy= 0.812, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 680, Reward=-23.70063, Minibatch Loss= 0.5289, Training Accuracy= 0.875, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 950, Reward=-25.448223, Minibatch Loss= 1.0440, Training Accuracy= 0.812, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 690, Reward=-35.593258, Minibatch Loss= 0.5822, Training Accuracy= 0.867, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 960, Reward=-31.948975, Minibatch Loss= 0.8303, Training Accuracy= 0.828, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 700, Reward=-16.860857, Minibatch Loss= 0.8791, Training Accuracy= 0.773, Testing Acc= 0.798246, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 970, Reward=-23.616982, Minibatch Loss= 0.9270, Training Accuracy= 0.766, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 710, Reward=-15.879393, Minibatch Loss= 0.7197, Training Accuracy= 0.805, Testing Acc= 0.631579, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 980, Reward=-33.855824, Minibatch Loss= 0.7372, Training Accuracy= 0.836, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 720, Reward=-19.895605, Minibatch Loss= 0.7286, Training Accuracy= 0.852, Testing Acc= 0.631579, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 990, Reward=-24.576073, Minibatch Loss= 0.7576, Training Accuracy= 0.859, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 730, Reward=-26.043613, Minibatch Loss= 0.5878, Training Accuracy= 0.898, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1000, Reward=-22.490932, Minibatch Loss= 0.7294, Training Accuracy= 0.812, Testing Acc= 0.780702, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 740, Reward=-23.020868, Minibatch Loss= 0.5708, Training Accuracy= 0.859, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1010, Reward=-26.71202, Minibatch Loss= 0.7234, Training Accuracy= 0.805, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 750, Reward=-8.694982, Minibatch Loss= 0.5105, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1020, Reward=-18.264164, Minibatch Loss= 0.7826, Training Accuracy= 0.789, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 760, Reward=-21.714283, Minibatch Loss= 0.4807, Training Accuracy= 0.875, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1030, Reward=-30.600304, Minibatch Loss= 0.6074, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 770, Reward=-18.41939, Minibatch Loss= 0.4719, Training Accuracy= 0.898, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1040, Reward=-27.497368, Minibatch Loss= 0.7245, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1050, Reward=-23.258968, Minibatch Loss= 0.7104, Training Accuracy= 0.852, Testing Acc= 0.640351, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 780, Reward=-15.679692, Minibatch Loss= 0.4963, Training Accuracy= 0.883, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1060, Reward=-28.800566, Minibatch Loss= 0.6472, Training Accuracy= 0.844, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 790, Reward=-21.957933, Minibatch Loss= 0.6518, Training Accuracy= 0.836, Testing Acc= 0.543860, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1070, Reward=-22.220934, Minibatch Loss= 0.6057, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 800, Reward=-32.502213, Minibatch Loss= 0.6693, Training Accuracy= 0.867, Testing Acc= 0.605263, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1080, Reward=-23.099255, Minibatch Loss= 0.5873, Training Accuracy= 0.844, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 810, Reward=-15.088331, Minibatch Loss= 0.4699, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1090, Reward=-23.540829, Minibatch Loss= 0.6794, Training Accuracy= 0.812, Testing Acc= 0.771930, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 820, Reward=-15.985937, Minibatch Loss= 0.4406, Training Accuracy= 0.906, Testing Acc= 0.605263, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1100, Reward=-25.182102, Minibatch Loss= 0.6065, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 830, Reward=-19.356924, Minibatch Loss= 0.6434, Training Accuracy= 0.789, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1110, Reward=-30.071802, Minibatch Loss= 0.6218, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 840, Reward=-28.298763, Minibatch Loss= 0.8226, Training Accuracy= 0.805, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1120, Reward=-17.477753, Minibatch Loss= 0.6696, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 850, Reward=-22.178751, Minibatch Loss= 0.6554, Training Accuracy= 0.906, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1130, Reward=-20.73938, Minibatch Loss= 0.6589, Training Accuracy= 0.836, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 860, Reward=-11.671335, Minibatch Loss= 0.5879, Training Accuracy= 0.875, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1140, Reward=-22.71706, Minibatch Loss= 0.8391, Training Accuracy= 0.875, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 870, Reward=-8.224088, Minibatch Loss= 0.5259, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1150, Reward=-33.68425, Minibatch Loss= 0.7869, Training Accuracy= 0.766, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 880, Reward=-15.965096, Minibatch Loss= 0.4856, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1160, Reward=-34.18166, Minibatch Loss= 0.9392, Training Accuracy= 0.805, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 890, Reward=-24.040564, Minibatch Loss= 0.4419, Training Accuracy= 0.883, Testing Acc= 0.543860, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1170, Reward=-32.18603, Minibatch Loss= 0.8816, Training Accuracy= 0.828, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 900, Reward=-19.756845, Minibatch Loss= 0.5101, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1180, Reward=-24.039028, Minibatch Loss= 0.9732, Training Accuracy= 0.820, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 910, Reward=-27.747623, Minibatch Loss= 0.5884, Training Accuracy= 0.906, Testing Acc= 0.763158, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1190, Reward=-28.769812, Minibatch Loss= 0.9381, Training Accuracy= 0.750, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 920, Reward=-23.934462, Minibatch Loss= 0.6577, Training Accuracy= 0.828, Testing Acc= 0.763158, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1200, Reward=-22.259802, Minibatch Loss= 0.8072, Training Accuracy= 0.844, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 930, Reward=-20.830105, Minibatch Loss= 0.6283, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1210, Reward=-22.155867, Minibatch Loss= 0.7820, Training Accuracy= 0.836, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 940, Reward=-15.352586, Minibatch Loss= 0.6516, Training Accuracy= 0.828, Testing Acc= 0.745614, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1220, Reward=-29.321081, Minibatch Loss= 0.8748, Training Accuracy= 0.805, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 950, Reward=-23.672333, Minibatch Loss= 0.5451, Training Accuracy= 0.922, Testing Acc= 0.657895, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1230, Reward=-31.736729, Minibatch Loss= 1.0659, Training Accuracy= 0.664, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 960, Reward=-23.98431, Minibatch Loss= 0.7182, Training Accuracy= 0.836, Testing Acc= 0.666667, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1240, Reward=-38.84818, Minibatch Loss= 0.8018, Training Accuracy= 0.859, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 970, Reward=-31.991035, Minibatch Loss= 0.7243, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1250, Reward=-21.490437, Minibatch Loss= 0.8044, Training Accuracy= 0.844, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1260, Reward=-24.033148, Minibatch Loss= 0.6970, Training Accuracy= 0.852, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 980, Reward=-14.303739, Minibatch Loss= 0.5911, Training Accuracy= 0.852, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1270, Reward=-25.186098, Minibatch Loss= 0.6659, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 990, Reward=-20.027308, Minibatch Loss= 0.5478, Training Accuracy= 0.914, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1280, Reward=-25.03131, Minibatch Loss= 0.6241, Training Accuracy= 0.859, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1000, Reward=-22.972715, Minibatch Loss= 0.8297, Training Accuracy= 0.859, Testing Acc= 0.736842, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1290, Reward=-21.887398, Minibatch Loss= 0.6581, Training Accuracy= 0.836, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1010, Reward=-24.773819, Minibatch Loss= 0.5913, Training Accuracy= 0.906, Testing Acc= 0.649123, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1300, Reward=-25.917206, Minibatch Loss= 0.7337, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1020, Reward=-23.326279, Minibatch Loss= 0.5709, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1310, Reward=-30.07399, Minibatch Loss= 0.8471, Training Accuracy= 0.797, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1030, Reward=-14.633673, Minibatch Loss= 0.4938, Training Accuracy= 0.898, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1320, Reward=-26.856808, Minibatch Loss= 0.8613, Training Accuracy= 0.695, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1040, Reward=-19.72909, Minibatch Loss= 0.5802, Training Accuracy= 0.812, Testing Acc= 0.649123, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1330, Reward=-27.931593, Minibatch Loss= 0.7021, Training Accuracy= 0.883, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1050, Reward=-21.341686, Minibatch Loss= 0.5622, Training Accuracy= 0.875, Testing Acc= 0.622807, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1340, Reward=-19.840652, Minibatch Loss= 0.7777, Training Accuracy= 0.836, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1060, Reward=-24.883877, Minibatch Loss= 0.6284, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1350, Reward=-29.085432, Minibatch Loss= 0.5934, Training Accuracy= 0.922, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1070, Reward=-13.380347, Minibatch Loss= 0.5528, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1360, Reward=-27.713865, Minibatch Loss= 0.7002, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1080, Reward=-12.188248, Minibatch Loss= 0.5536, Training Accuracy= 0.898, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1370, Reward=-22.955751, Minibatch Loss= 0.6626, Training Accuracy= 0.875, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1090, Reward=-22.071877, Minibatch Loss= 0.5299, Training Accuracy= 0.898, Testing Acc= 0.543860, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1380, Reward=-28.2907, Minibatch Loss= 0.7197, Training Accuracy= 0.852, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1100, Reward=-19.586313, Minibatch Loss= 0.5974, Training Accuracy= 0.812, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1390, Reward=-16.72343, Minibatch Loss= 0.7053, Training Accuracy= 0.797, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1110, Reward=-18.983858, Minibatch Loss= 0.5135, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1400, Reward=-21.67752, Minibatch Loss= 0.5783, Training Accuracy= 0.852, Testing Acc= 0.692982, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1120, Reward=-6.683394, Minibatch Loss= 0.5872, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1410, Reward=-20.212929, Minibatch Loss= 0.5244, Training Accuracy= 0.883, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1130, Reward=-18.35205, Minibatch Loss= 0.4932, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1420, Reward=-24.252853, Minibatch Loss= 0.6178, Training Accuracy= 0.828, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1140, Reward=-16.82319, Minibatch Loss= 0.6550, Training Accuracy= 0.836, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1430, Reward=-19.329437, Minibatch Loss= 0.6058, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1440, Reward=-17.842361, Minibatch Loss= 0.5677, Training Accuracy= 0.906, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1150, Reward=-22.208763, Minibatch Loss= 0.6306, Training Accuracy= 0.844, Testing Acc= 0.719298, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1450, Reward=-16.602526, Minibatch Loss= 0.6798, Training Accuracy= 0.836, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1160, Reward=-21.932663, Minibatch Loss= 0.7042, Training Accuracy= 0.797, Testing Acc= 0.719298, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1460, Reward=-19.022392, Minibatch Loss= 0.6419, Training Accuracy= 0.844, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1170, Reward=-27.717869, Minibatch Loss= 0.6096, Training Accuracy= 0.875, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1470, Reward=-10.326257, Minibatch Loss= 0.6536, Training Accuracy= 0.844, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1180, Reward=-17.277737, Minibatch Loss= 0.5640, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1480, Reward=-18.87935, Minibatch Loss= 0.5638, Training Accuracy= 0.836, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1190, Reward=-23.00355, Minibatch Loss= 0.5856, Training Accuracy= 0.844, Testing Acc= 0.517544, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1490, Reward=-15.033818, Minibatch Loss= 0.6255, Training Accuracy= 0.820, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1200, Reward=-15.748356, Minibatch Loss= 0.5154, Training Accuracy= 0.945, Testing Acc= 0.692982, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1500, Reward=-24.842361, Minibatch Loss= 0.7711, Training Accuracy= 0.703, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1210, Reward=-23.605093, Minibatch Loss= 0.8043, Training Accuracy= 0.875, Testing Acc= 0.754386, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1510, Reward=-29.662708, Minibatch Loss= 0.6625, Training Accuracy= 0.836, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1220, Reward=-20.641493, Minibatch Loss= 0.6876, Training Accuracy= 0.875, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1520, Reward=-16.458477, Minibatch Loss= 0.5473, Training Accuracy= 0.922, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1230, Reward=-24.696295, Minibatch Loss= 0.6124, Training Accuracy= 0.914, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1530, Reward=-17.792166, Minibatch Loss= 0.7351, Training Accuracy= 0.820, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1240, Reward=-11.399165, Minibatch Loss= 0.6087, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1540, Reward=-30.556763, Minibatch Loss= 0.6109, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1250, Reward=-17.993887, Minibatch Loss= 0.8735, Training Accuracy= 0.789, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1550, Reward=-10.795573, Minibatch Loss= 0.5305, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1260, Reward=-25.87732, Minibatch Loss= 0.7794, Training Accuracy= 0.820, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1560, Reward=-15.275518, Minibatch Loss= 0.4429, Training Accuracy= 0.922, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1270, Reward=-29.340342, Minibatch Loss= 0.6196, Training Accuracy= 0.922, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1570, Reward=-20.397118, Minibatch Loss= 0.4334, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1280, Reward=-21.014818, Minibatch Loss= 0.6312, Training Accuracy= 0.867, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1580, Reward=-15.998482, Minibatch Loss= 0.7059, Training Accuracy= 0.781, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1290, Reward=-8.282663, Minibatch Loss= 0.4724, Training Accuracy= 0.930, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1590, Reward=-17.331448, Minibatch Loss= 0.4979, Training Accuracy= 0.852, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1300, Reward=-23.76331, Minibatch Loss= 0.4998, Training Accuracy= 0.930, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1600, Reward=-10.625403, Minibatch Loss= 0.5118, Training Accuracy= 0.883, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1310, Reward=-13.17753, Minibatch Loss= 0.5114, Training Accuracy= 0.898, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1610, Reward=-22.440048, Minibatch Loss= 0.5692, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1320, Reward=-18.236609, Minibatch Loss= 0.6268, Training Accuracy= 0.852, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1620, Reward=-28.653816, Minibatch Loss= 0.7450, Training Accuracy= 0.773, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1330, Reward=-19.916262, Minibatch Loss= 0.6317, Training Accuracy= 0.891, Testing Acc= 0.517544, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1630, Reward=-22.107788, Minibatch Loss= 0.6127, Training Accuracy= 0.852, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1340, Reward=-33.449314, Minibatch Loss= 0.6582, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1640, Reward=-18.031183, Minibatch Loss= 0.7291, Training Accuracy= 0.828, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1350, Reward=-18.349485, Minibatch Loss= 0.5960, Training Accuracy= 0.906, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1650, Reward=-23.855938, Minibatch Loss= 0.6065, Training Accuracy= 0.898, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1360, Reward=-22.708408, Minibatch Loss= 0.5640, Training Accuracy= 0.891, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1660, Reward=-24.735092, Minibatch Loss= 0.7064, Training Accuracy= 0.758, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1370, Reward=-27.181623, Minibatch Loss= 0.5690, Training Accuracy= 0.852, Testing Acc= 0.543860, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1670, Reward=-25.256207, Minibatch Loss= 0.6918, Training Accuracy= 0.828, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1380, Reward=-6.3016996, Minibatch Loss= 0.4802, Training Accuracy= 0.891, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1680, Reward=-19.277205, Minibatch Loss= 0.5765, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1390, Reward=-16.661613, Minibatch Loss= 0.8022, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1690, Reward=-24.122213, Minibatch Loss= 1.0279, Training Accuracy= 0.633, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1400, Reward=-16.312603, Minibatch Loss= 0.5783, Training Accuracy= 0.820, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1700, Reward=-32.016167, Minibatch Loss= 0.6809, Training Accuracy= 0.844, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1410, Reward=-23.463146, Minibatch Loss= 0.4953, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1710, Reward=-24.810553, Minibatch Loss= 0.6158, Training Accuracy= 0.914, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1420, Reward=-23.803806, Minibatch Loss= 0.4890, Training Accuracy= 0.922, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1720, Reward=-16.98854, Minibatch Loss= 0.5450, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1430, Reward=-27.125654, Minibatch Loss= 0.5782, Training Accuracy= 0.883, Testing Acc= 0.517544, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1730, Reward=-21.419077, Minibatch Loss= 0.5655, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1440, Reward=-21.09862, Minibatch Loss= 0.5667, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1740, Reward=-32.835804, Minibatch Loss= 0.7286, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1450, Reward=-19.247648, Minibatch Loss= 0.5986, Training Accuracy= 0.828, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1750, Reward=-27.699356, Minibatch Loss= 0.9087, Training Accuracy= 0.836, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1460, Reward=-17.137764, Minibatch Loss= 0.5962, Training Accuracy= 0.930, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1760, Reward=-17.837349, Minibatch Loss= 0.8591, Training Accuracy= 0.828, Testing Acc= 0.535088, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1470, Reward=-14.773518, Minibatch Loss= 0.6302, Training Accuracy= 0.836, Testing Acc= 0.605263, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1770, Reward=-22.96223, Minibatch Loss= 0.6215, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1480, Reward=-18.757692, Minibatch Loss= 0.4887, Training Accuracy= 0.930, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1780, Reward=-12.8634405, Minibatch Loss= 0.5607, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1490, Reward=-17.64193, Minibatch Loss= 0.5454, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1790, Reward=-21.737135, Minibatch Loss= 0.5755, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1500, Reward=-13.937883, Minibatch Loss= 0.7918, Training Accuracy= 0.750, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1800, Reward=-18.726133, Minibatch Loss= 0.6057, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1510, Reward=-24.309263, Minibatch Loss= 0.7603, Training Accuracy= 0.750, Testing Acc= 0.763158, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1810, Reward=-18.062803, Minibatch Loss= 0.5988, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1520, Reward=-32.85152, Minibatch Loss= 0.5667, Training Accuracy= 0.859, Testing Acc= 0.745614, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1820, Reward=-17.275349, Minibatch Loss= 0.5325, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1530, Reward=-21.93056, Minibatch Loss= 0.6269, Training Accuracy= 0.875, Testing Acc= 0.728070, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1830, Reward=-9.734723, Minibatch Loss= 0.6251, Training Accuracy= 0.898, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1540, Reward=-23.65343, Minibatch Loss= 0.6242, Training Accuracy= 0.820, Testing Acc= 0.649123, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1840, Reward=-18.649282, Minibatch Loss= 0.5481, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1550, Reward=-18.3107, Minibatch Loss= 0.9805, Training Accuracy= 0.773, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1850, Reward=-12.107668, Minibatch Loss= 0.4280, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1560, Reward=-20.509544, Minibatch Loss= 0.6261, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1860, Reward=-18.301683, Minibatch Loss= 0.4739, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1570, Reward=-21.761059, Minibatch Loss= 0.5941, Training Accuracy= 0.844, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1870, Reward=-16.727194, Minibatch Loss= 0.4435, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1580, Reward=-18.092358, Minibatch Loss= 0.5132, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1880, Reward=-11.578217, Minibatch Loss= 0.4385, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1590, Reward=-18.635681, Minibatch Loss= 0.4567, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1890, Reward=-7.4866123, Minibatch Loss= 0.4711, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1600, Reward=-13.125091, Minibatch Loss= 0.4851, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1900, Reward=-12.023349, Minibatch Loss= 0.4951, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1610, Reward=-22.48618, Minibatch Loss= 0.4951, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1910, Reward=-12.588909, Minibatch Loss= 0.3503, Training Accuracy= 0.930, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1620, Reward=-16.256245, Minibatch Loss= 0.4656, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1920, Reward=-18.089478, Minibatch Loss= 0.4351, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1630, Reward=-12.309977, Minibatch Loss= 0.3872, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1930, Reward=-14.816189, Minibatch Loss= 0.7321, Training Accuracy= 0.781, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1640, Reward=-19.212694, Minibatch Loss= 0.4306, Training Accuracy= 0.883, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1940, Reward=-24.121214, Minibatch Loss= 0.7379, Training Accuracy= 0.781, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1650, Reward=-12.462009, Minibatch Loss= 0.4116, Training Accuracy= 0.938, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1950, Reward=-10.602081, Minibatch Loss= 0.7510, Training Accuracy= 0.828, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1960, Reward=-22.06119, Minibatch Loss= 0.7897, Training Accuracy= 0.828, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1660, Reward=-12.042823, Minibatch Loss= 0.6385, Training Accuracy= 0.820, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1670, Reward=-15.559195, Minibatch Loss= 0.5474, Training Accuracy= 0.875, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1970, Reward=-29.49834, Minibatch Loss= 0.8452, Training Accuracy= 0.820, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1980, Reward=-25.075512, Minibatch Loss= 1.0128, Training Accuracy= 0.719, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1680, Reward=-13.050086, Minibatch Loss= 0.4744, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1990, Reward=-33.263107, Minibatch Loss= 0.7823, Training Accuracy= 0.828, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1690, Reward=-7.020702, Minibatch Loss= 0.5801, Training Accuracy= 0.789, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2000, Reward=-27.80272, Minibatch Loss= 0.8360, Training Accuracy= 0.805, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1700, Reward=-27.4494, Minibatch Loss= 0.6384, Training Accuracy= 0.852, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2010, Reward=-22.440542, Minibatch Loss= 0.7736, Training Accuracy= 0.820, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1710, Reward=-20.924522, Minibatch Loss= 0.6281, Training Accuracy= 0.875, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2020, Reward=-30.674908, Minibatch Loss= 0.8823, Training Accuracy= 0.766, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1720, Reward=-15.983968, Minibatch Loss= 0.5206, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2030, Reward=-26.406437, Minibatch Loss= 0.7891, Training Accuracy= 0.836, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1730, Reward=-10.287702, Minibatch Loss= 0.5428, Training Accuracy= 0.898, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 1740, Reward=-20.481256, Minibatch Loss= 0.5021, Training Accuracy= 0.891, Testing Acc= 0.605263, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2040, Reward=-25.191813, Minibatch Loss= 0.6396, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2050, Reward=-22.150463, Minibatch Loss= 0.6438, Training Accuracy= 0.867, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1750, Reward=-8.849292, Minibatch Loss= 0.5032, Training Accuracy= 0.859, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2060, Reward=-28.829739, Minibatch Loss= 0.6144, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1760, Reward=-9.432855, Minibatch Loss= 0.3975, Training Accuracy= 0.930, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2070, Reward=-33.75055, Minibatch Loss= 0.8060, Training Accuracy= 0.820, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1770, Reward=-12.285128, Minibatch Loss= 0.4170, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2080, Reward=-31.893665, Minibatch Loss= 0.8224, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1780, Reward=-12.767043, Minibatch Loss= 0.4610, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2090, Reward=-20.204813, Minibatch Loss= 0.7296, Training Accuracy= 0.859, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1790, Reward=-14.98459, Minibatch Loss= 0.4774, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2100, Reward=-26.499592, Minibatch Loss= 0.7162, Training Accuracy= 0.812, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1800, Reward=-8.867739, Minibatch Loss= 0.3976, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2110, Reward=-20.971952, Minibatch Loss= 0.7184, Training Accuracy= 0.812, Testing Acc= 0.719298, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1810, Reward=-14.418836, Minibatch Loss= 0.4089, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2120, Reward=-18.144497, Minibatch Loss= 0.7400, Training Accuracy= 0.820, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1820, Reward=-17.126186, Minibatch Loss= 0.5045, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2130, Reward=-22.64327, Minibatch Loss= 0.8312, Training Accuracy= 0.844, Testing Acc= 0.763158, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1830, Reward=-11.440855, Minibatch Loss= 0.3688, Training Accuracy= 0.938, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2140, Reward=-33.331696, Minibatch Loss= 0.6458, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1840, Reward=-17.746027, Minibatch Loss= 0.4244, Training Accuracy= 0.914, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2150, Reward=-18.379429, Minibatch Loss= 0.5760, Training Accuracy= 0.914, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1850, Reward=-8.035946, Minibatch Loss= 0.4321, Training Accuracy= 0.922, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2160, Reward=-19.152576, Minibatch Loss= 0.4602, Training Accuracy= 0.930, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1860, Reward=-18.953072, Minibatch Loss= 0.4213, Training Accuracy= 0.930, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2170, Reward=-17.826248, Minibatch Loss= 0.4961, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1870, Reward=-10.921539, Minibatch Loss= 0.4146, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2180, Reward=-18.947992, Minibatch Loss= 0.7230, Training Accuracy= 0.844, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1880, Reward=-13.493902, Minibatch Loss= 0.3777, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2190, Reward=-34.465004, Minibatch Loss= 0.9763, Training Accuracy= 0.773, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2200, Reward=-25.125332, Minibatch Loss= 0.8090, Training Accuracy= 0.867, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1890, Reward=-17.185522, Minibatch Loss= 0.4457, Training Accuracy= 0.914, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2210, Reward=-17.994505, Minibatch Loss= 0.8451, Training Accuracy= 0.797, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1900, Reward=-9.324991, Minibatch Loss= 0.3476, Training Accuracy= 0.938, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2220, Reward=-28.655754, Minibatch Loss= 0.6911, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1910, Reward=-23.951834, Minibatch Loss= 0.3884, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2230, Reward=-20.034828, Minibatch Loss= 0.7631, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1920, Reward=-12.285025, Minibatch Loss= 0.4134, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2240, Reward=-25.380875, Minibatch Loss= 0.9264, Training Accuracy= 0.734, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1930, Reward=-11.438733, Minibatch Loss= 0.4180, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2250, Reward=-31.347683, Minibatch Loss= 0.8752, Training Accuracy= 0.789, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1940, Reward=-11.49402, Minibatch Loss= 0.5006, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2260, Reward=-25.704533, Minibatch Loss= 0.9387, Training Accuracy= 0.734, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1950, Reward=-17.492493, Minibatch Loss= 0.4239, Training Accuracy= 0.914, Testing Acc= 0.517544, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2270, Reward=-28.510592, Minibatch Loss= 0.9074, Training Accuracy= 0.805, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1960, Reward=-21.02269, Minibatch Loss= 0.6157, Training Accuracy= 0.812, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2280, Reward=-25.115774, Minibatch Loss= 0.6809, Training Accuracy= 0.906, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1970, Reward=-28.44814, Minibatch Loss= 0.6013, Training Accuracy= 0.898, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2290, Reward=-16.844099, Minibatch Loss= 0.7882, Training Accuracy= 0.844, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1980, Reward=-25.680412, Minibatch Loss= 0.5134, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2300, Reward=-22.715473, Minibatch Loss= 0.6600, Training Accuracy= 0.945, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 1990, Reward=-17.909052, Minibatch Loss= 0.4634, Training Accuracy= 0.906, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2310, Reward=-16.325996, Minibatch Loss= 0.9173, Training Accuracy= 0.797, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2000, Reward=-15.052186, Minibatch Loss= 0.5728, Training Accuracy= 0.836, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2320, Reward=-26.027386, Minibatch Loss= 0.7842, Training Accuracy= 0.859, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2010, Reward=-9.817618, Minibatch Loss= 0.3607, Training Accuracy= 0.930, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2330, Reward=-20.464077, Minibatch Loss= 0.8150, Training Accuracy= 0.812, Testing Acc= 0.745614, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2020, Reward=-10.314875, Minibatch Loss= 0.5918, Training Accuracy= 0.781, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2340, Reward=-19.6666, Minibatch Loss= 0.6933, Training Accuracy= 0.859, Testing Acc= 0.754386, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2030, Reward=-15.187254, Minibatch Loss= 0.3363, Training Accuracy= 0.945, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2350, Reward=-22.964828, Minibatch Loss= 0.6796, Training Accuracy= 0.883, Testing Acc= 0.622807, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2040, Reward=-19.843567, Minibatch Loss= 0.5410, Training Accuracy= 0.844, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2360, Reward=-21.851562, Minibatch Loss= 0.6757, Training Accuracy= 0.820, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2050, Reward=-23.406574, Minibatch Loss= 0.5814, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2370, Reward=-29.306717, Minibatch Loss= 0.6729, Training Accuracy= 0.852, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2060, Reward=-19.53981, Minibatch Loss= 0.7943, Training Accuracy= 0.836, Testing Acc= 0.719298, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2380, Reward=-22.840593, Minibatch Loss= 0.6842, Training Accuracy= 0.852, Testing Acc= 0.552632, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2070, Reward=-11.861025, Minibatch Loss= 0.5096, Training Accuracy= 0.891, Testing Acc= 0.657895, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2390, Reward=-23.856815, Minibatch Loss= 0.7499, Training Accuracy= 0.906, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2080, Reward=-17.563526, Minibatch Loss= 0.4920, Training Accuracy= 0.875, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2400, Reward=-24.354937, Minibatch Loss= 0.6565, Training Accuracy= 0.883, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2090, Reward=-14.2103, Minibatch Loss= 0.5118, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2410, Reward=-17.113594, Minibatch Loss= 0.6383, Training Accuracy= 0.891, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2100, Reward=-17.927952, Minibatch Loss= 0.4976, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2420, Reward=-19.03194, Minibatch Loss= 0.7003, Training Accuracy= 0.820, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2110, Reward=-17.519428, Minibatch Loss= 0.4714, Training Accuracy= 0.867, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2430, Reward=-17.92351, Minibatch Loss= 0.5527, Training Accuracy= 0.898, Testing Acc= 0.675439, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2120, Reward=-15.927029, Minibatch Loss= 0.5983, Training Accuracy= 0.805, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2440, Reward=-14.409695, Minibatch Loss= 0.6076, Training Accuracy= 0.844, Testing Acc= 0.657895, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2130, Reward=-24.705147, Minibatch Loss= 0.5642, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2450, Reward=-19.342344, Minibatch Loss= 0.5910, Training Accuracy= 0.867, Testing Acc= 0.631579, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2140, Reward=-6.297702, Minibatch Loss= 0.6058, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2460, Reward=-26.224693, Minibatch Loss= 0.7293, Training Accuracy= 0.867, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2150, Reward=-11.409374, Minibatch Loss= 0.6039, Training Accuracy= 0.867, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2470, Reward=-28.246368, Minibatch Loss= 0.8016, Training Accuracy= 0.898, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2160, Reward=-19.635702, Minibatch Loss= 0.5417, Training Accuracy= 0.859, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2480, Reward=-22.843664, Minibatch Loss= 0.7735, Training Accuracy= 0.859, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2170, Reward=-12.725081, Minibatch Loss= 0.5712, Training Accuracy= 0.891, Testing Acc= 0.614035, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2490, Reward=-16.39653, Minibatch Loss= 0.7191, Training Accuracy= 0.852, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2180, Reward=-21.825312, Minibatch Loss= 0.5414, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2500, Reward=-16.22843, Minibatch Loss= 0.7675, Training Accuracy= 0.812, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2190, Reward=-20.39991, Minibatch Loss= 0.6037, Training Accuracy= 0.875, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2510, Reward=-24.988058, Minibatch Loss= 0.7251, Training Accuracy= 0.797, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2200, Reward=-15.408033, Minibatch Loss= 0.4262, Training Accuracy= 0.938, Testing Acc= 0.614035, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2520, Reward=-18.277012, Minibatch Loss= 0.6182, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2210, Reward=-15.97004, Minibatch Loss= 0.5384, Training Accuracy= 0.891, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2530, Reward=-17.794601, Minibatch Loss= 0.6129, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2540, Reward=-25.993492, Minibatch Loss= 0.7055, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2220, Reward=-22.950768, Minibatch Loss= 0.6441, Training Accuracy= 0.852, Testing Acc= 0.614035, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2230, Reward=-37.208298, Minibatch Loss= 0.6456, Training Accuracy= 0.883, Testing Acc= 0.614035, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2550, Reward=-22.630304, Minibatch Loss= 0.7507, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2240, Reward=-26.249344, Minibatch Loss= 0.6856, Training Accuracy= 0.844, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2560, Reward=-15.779579, Minibatch Loss= 0.7447, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2250, Reward=-29.111279, Minibatch Loss= 0.7207, Training Accuracy= 0.844, Testing Acc= 0.517544, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2570, Reward=-9.679295, Minibatch Loss= 0.7448, Training Accuracy= 0.852, Testing Acc= 0.570175, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2580, Reward=-23.37355, Minibatch Loss= 0.7724, Training Accuracy= 0.836, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2260, Reward=-33.02072, Minibatch Loss= 0.6445, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2590, Reward=-17.402603, Minibatch Loss= 0.7168, Training Accuracy= 0.898, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2270, Reward=-17.057568, Minibatch Loss= 0.6084, Training Accuracy= 0.883, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2600, Reward=-22.197369, Minibatch Loss= 0.7617, Training Accuracy= 0.867, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2280, Reward=-25.228222, Minibatch Loss= 0.5105, Training Accuracy= 0.914, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2610, Reward=-16.176397, Minibatch Loss= 0.9180, Training Accuracy= 0.820, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2290, Reward=-10.78867, Minibatch Loss= 0.5376, Training Accuracy= 0.867, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2620, Reward=-27.31609, Minibatch Loss= 0.7362, Training Accuracy= 0.836, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2300, Reward=-3.1778193, Minibatch Loss= 0.4224, Training Accuracy= 0.938, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2630, Reward=-23.415752, Minibatch Loss= 0.5792, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2310, Reward=-17.873152, Minibatch Loss= 0.5925, Training Accuracy= 0.852, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2640, Reward=-11.811201, Minibatch Loss= 0.6040, Training Accuracy= 0.867, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2320, Reward=-13.693835, Minibatch Loss= 0.5744, Training Accuracy= 0.914, Testing Acc= 0.543860, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2650, Reward=-18.74939, Minibatch Loss= 0.7166, Training Accuracy= 0.797, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2330, Reward=-16.220716, Minibatch Loss= 0.6018, Training Accuracy= 0.859, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2660, Reward=-16.004389, Minibatch Loss= 0.6396, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2340, Reward=-14.603124, Minibatch Loss= 0.7934, Training Accuracy= 0.891, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2670, Reward=-23.380976, Minibatch Loss= 0.8360, Training Accuracy= 0.812, Testing Acc= 0.614035, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2350, Reward=-15.318689, Minibatch Loss= 0.6690, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2680, Reward=-28.435066, Minibatch Loss= 0.6724, Training Accuracy= 0.844, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2360, Reward=-14.874202, Minibatch Loss= 0.6095, Training Accuracy= 0.906, Testing Acc= 0.561404, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2690, Reward=-26.140488, Minibatch Loss= 0.6320, Training Accuracy= 0.875, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2370, Reward=-21.26699, Minibatch Loss= 0.5955, Training Accuracy= 0.859, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2700, Reward=-30.257889, Minibatch Loss= 0.7914, Training Accuracy= 0.844, Testing Acc= 0.605263, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2380, Reward=-20.301744, Minibatch Loss= 0.5135, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2710, Reward=-21.914558, Minibatch Loss= 0.6873, Training Accuracy= 0.875, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2390, Reward=-17.745083, Minibatch Loss= 0.4788, Training Accuracy= 0.922, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2720, Reward=-28.535887, Minibatch Loss= 0.6397, Training Accuracy= 0.898, Testing Acc= 0.649123, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2400, Reward=-20.367031, Minibatch Loss= 0.6224, Training Accuracy= 0.828, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2730, Reward=-23.44251, Minibatch Loss= 0.9363, Training Accuracy= 0.758, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2410, Reward=-8.637501, Minibatch Loss= 0.5607, Training Accuracy= 0.906, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2740, Reward=-33.974697, Minibatch Loss= 0.7491, Training Accuracy= 0.828, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2420, Reward=-20.310867, Minibatch Loss= 0.5145, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2750, Reward=-28.57459, Minibatch Loss= 0.6558, Training Accuracy= 0.883, Testing Acc= 0.543860, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2430, Reward=-16.639383, Minibatch Loss= 0.5127, Training Accuracy= 0.891, Testing Acc= 0.535088, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2760, Reward=-14.136381, Minibatch Loss= 0.5326, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2440, Reward=-20.319109, Minibatch Loss= 0.8180, Training Accuracy= 0.711, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2770, Reward=-17.370977, Minibatch Loss= 0.4692, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2450, Reward=-33.449627, Minibatch Loss= 0.6966, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2780, Reward=-6.103097, Minibatch Loss= 0.4079, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2460, Reward=-20.431541, Minibatch Loss= 0.5477, Training Accuracy= 0.898, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2790, Reward=-9.517277, Minibatch Loss= 0.4696, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2470, Reward=0.3682518, Minibatch Loss= 0.4225, Training Accuracy= 0.914, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2800, Reward=-23.94869, Minibatch Loss= 0.6867, Training Accuracy= 0.828, Testing Acc= 0.596491, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2810, Reward=-28.914356, Minibatch Loss= 0.6867, Training Accuracy= 0.930, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2480, Reward=-9.385681, Minibatch Loss= 0.4038, Training Accuracy= 0.930, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2820, Reward=-24.75806, Minibatch Loss= 0.9621, Training Accuracy= 0.742, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2490, Reward=-10.112466, Minibatch Loss= 0.5195, Training Accuracy= 0.891, Testing Acc= 0.508772, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2500, Reward=-16.530443, Minibatch Loss= 0.3654, Training Accuracy= 0.930, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2830, Reward=-22.234528, Minibatch Loss= 0.6501, Training Accuracy= 0.836, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2840, Reward=-23.031355, Minibatch Loss= 0.8408, Training Accuracy= 0.805, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2510, Reward=-14.1160145, Minibatch Loss= 0.5326, Training Accuracy= 0.852, Testing Acc= 0.526316, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2850, Reward=-24.58304, Minibatch Loss= 0.5744, Training Accuracy= 0.930, Testing Acc= 0.508772, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2520, Reward=-17.380035, Minibatch Loss= 0.4512, Training Accuracy= 0.891, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2860, Reward=-16.73093, Minibatch Loss= 0.5781, Training Accuracy= 0.875, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2530, Reward=-15.705737, Minibatch Loss= 0.4690, Training Accuracy= 0.883, Testing Acc= 0.500000, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2870, Reward=-25.277185, Minibatch Loss= 0.7777, Training Accuracy= 0.758, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2540, Reward=-19.633179, Minibatch Loss= 0.5038, Training Accuracy= 0.898, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2880, Reward=-30.183025, Minibatch Loss= 0.8749, Training Accuracy= 0.805, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2550, Reward=-12.410604, Minibatch Loss= 0.4153, Training Accuracy= 0.914, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2890, Reward=-28.887022, Minibatch Loss= 0.7701, Training Accuracy= 0.875, Testing Acc= 0.736842, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2560, Reward=-10.929781, Minibatch Loss= 0.4278, Training Accuracy= 0.867, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2900, Reward=-27.844254, Minibatch Loss= 0.6616, Training Accuracy= 0.891, Testing Acc= 0.710526, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2570, Reward=-17.231882, Minibatch Loss= 0.3336, Training Accuracy= 0.953, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2910, Reward=-20.399036, Minibatch Loss= 0.6281, Training Accuracy= 0.859, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2580, Reward=-5.3416653, Minibatch Loss= 0.3763, Training Accuracy= 0.922, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2920, Reward=-20.562286, Minibatch Loss= 0.5764, Training Accuracy= 0.906, Testing Acc= 0.517544, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2590, Reward=-11.117208, Minibatch Loss= 0.3427, Training Accuracy= 0.977, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2930, Reward=-17.929575, Minibatch Loss= 0.8311, Training Accuracy= 0.750, Testing Acc= 0.500000, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 2600, Reward=-21.058308, Minibatch Loss= 0.4115, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2940, Reward=-22.856722, Minibatch Loss= 0.6593, Training Accuracy= 0.828, Testing Acc= 0.526316, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2610, Reward=-15.584545, Minibatch Loss= 0.3671, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2950, Reward=-22.525076, Minibatch Loss= 0.6033, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2620, Reward=-21.738773, Minibatch Loss= 0.5015, Training Accuracy= 0.875, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2960, Reward=-18.687233, Minibatch Loss= 0.6198, Training Accuracy= 0.812, Testing Acc= 0.578947, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2630, Reward=-16.980614, Minibatch Loss= 0.3158, Training Accuracy= 0.984, Testing Acc= 0.543860, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2970, Reward=-28.193672, Minibatch Loss= 0.6208, Training Accuracy= 0.922, Testing Acc= 0.561404, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2640, Reward=-16.383644, Minibatch Loss= 0.4038, Training Accuracy= 0.891, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2980, Reward=-26.972542, Minibatch Loss= 0.7640, Training Accuracy= 0.773, Testing Acc= 0.666667, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Step 2650, Reward=-7.311115, Minibatch Loss= 0.3190, Training Accuracy= 0.961, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2990, Reward=-34.102734, Minibatch Loss= 0.8451, Training Accuracy= 0.828, Testing Acc= 0.684211, Max Final Accuracy=  0.815789, Max AUC=  0.874577, Max AP=  0.901719\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.81578946\n",
      "Step 2660, Reward=-9.531818, Minibatch Loss= 0.8206, Training Accuracy= 0.836, Testing Acc= 0.552632, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2670, Reward=-29.919233, Minibatch Loss= 0.5437, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2680, Reward=-18.97409, Minibatch Loss= 0.4975, Training Accuracy= 0.883, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2690, Reward=-12.041051, Minibatch Loss= 0.4000, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2700, Reward=-8.111198, Minibatch Loss= 0.3234, Training Accuracy= 0.930, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2710, Reward=-6.9259424, Minibatch Loss= 0.3780, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2720, Reward=-14.24675, Minibatch Loss= 0.3090, Training Accuracy= 0.961, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2730, Reward=-8.107285, Minibatch Loss= 0.2905, Training Accuracy= 0.953, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2740, Reward=-8.898514, Minibatch Loss= 0.2870, Training Accuracy= 0.969, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2750, Reward=-14.568042, Minibatch Loss= 0.5628, Training Accuracy= 0.883, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2760, Reward=-16.5965, Minibatch Loss= 0.5450, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2770, Reward=-19.687592, Minibatch Loss= 0.5393, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2780, Reward=-13.063124, Minibatch Loss= 0.4082, Training Accuracy= 0.953, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2790, Reward=-12.904329, Minibatch Loss= 0.3211, Training Accuracy= 0.945, Testing Acc= 0.578947, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2800, Reward=-14.920483, Minibatch Loss= 0.3694, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2810, Reward=-12.936054, Minibatch Loss= 0.4182, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2820, Reward=-6.158101, Minibatch Loss= 0.3586, Training Accuracy= 0.914, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2830, Reward=-10.549805, Minibatch Loss= 0.3938, Training Accuracy= 0.898, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2840, Reward=-16.278759, Minibatch Loss= 0.3650, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2850, Reward=-12.8861, Minibatch Loss= 0.3520, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2860, Reward=-11.988311, Minibatch Loss= 0.4005, Training Accuracy= 0.945, Testing Acc= 0.596491, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2870, Reward=-19.80556, Minibatch Loss= 0.4291, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2880, Reward=-6.173996, Minibatch Loss= 0.4816, Training Accuracy= 0.898, Testing Acc= 0.605263, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2890, Reward=-17.290464, Minibatch Loss= 0.4012, Training Accuracy= 0.906, Testing Acc= 0.570175, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2900, Reward=-8.6031885, Minibatch Loss= 0.3626, Training Accuracy= 0.891, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2910, Reward=-11.879831, Minibatch Loss= 0.4277, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2920, Reward=-13.342544, Minibatch Loss= 0.5671, Training Accuracy= 0.820, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2930, Reward=-8.670242, Minibatch Loss= 0.3394, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2940, Reward=-13.52185, Minibatch Loss= 0.3089, Training Accuracy= 0.922, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2950, Reward=-8.59078, Minibatch Loss= 0.3408, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2960, Reward=-15.525906, Minibatch Loss= 0.3468, Training Accuracy= 0.953, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2970, Reward=-16.164618, Minibatch Loss= 0.3136, Training Accuracy= 0.953, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2980, Reward=-10.862545, Minibatch Loss= 0.3327, Training Accuracy= 0.906, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Step 2990, Reward=-6.8630877, Minibatch Loss= 0.3360, Training Accuracy= 0.938, Testing Acc= 0.587719, Max Final Accuracy=  0.833333, Max AUC=  0.877347, Max AP=  0.896824\n",
      "Optimization Finished!\n",
      "Testing Accuracy: 0.8333333\n"
     ]
    }
   ],
   "source": [
    "LRs = [0.0005]\n",
    "DSs = [1500]\n",
    "DRs = [0.9, 0.8]\n",
    "ACs = [0.00005, 0.0001]\n",
    "CRs = [0.0005, 0.001]\n",
    "\n",
    "pool = mp.Pool(2)\n",
    "pool.map(train, [(lr, ds, dr, rl_a, rl_c) for lr in LRs for ds in DSs for dr in DRs for rl_a in ACs for rl_c in CRs])\n",
    "pool.close()\n",
    "pool.join()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
