{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Model training\n",
    "\n",
    "This notebook can be used to train the models used for the replication experiment. The notebook makes heavy use of predefined configuration files that describe the parameter setting of each model. Pretrained models using these specific parameters are also already available. Hence, retraining the models is not needed if you only wish to replicate the replication experiment. \n",
    "\n",
    "If you do wish to perform the replication experiments with your own retrained models, it is not sufficient to only retrain the model with this script. To prevent the training script from accidentally overriding the pretrained models, the models are saved in a different location then where the pretrained models are loaded from. \n",
    "\n",
    "**To replace the pretrained models in the replication study** you therefore need to copy the trained model from `checkpoints` to `Explanation/models/pretrained/<_model>/<_dataset>`. Where \\_model and \\_dataset are defined as in the code below. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ExplanationEvaluation.configs.selector import Selector\n",
    "from ExplanationEvaluation.tasks.training import train_node, train_graph\n",
    "\n",
    "import torch\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "_dataset = 'bashapes' # One of: bashapes, bacommunity, treecycles, treegrids, ba2motifs, mutag\n",
    "\n",
    "# Parameters below should only be changed if you want to run any of the experiments in the supplementary\n",
    "_folder = 'replication' # One of: replication, batchnorm\n",
    "_model = 'gnn' if _folder == 'replication' else 'ori'\n",
    "\n",
    "# PGExplainer\n",
    "config_path = f\"./ExplanationEvaluation/configs/{_folder}/models/model_{_model}_{_dataset}.json\"\n",
    "\n",
    "config = Selector(config_path)\n",
    "extension = (_folder == 'extension')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "config = Selector(config_path).args\n",
    "\n",
    "torch.manual_seed(config.model.seed)\n",
    "torch.cuda.manual_seed(config.model.seed)\n",
    "np.random.seed(config.model.seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading syn1 dataset\n",
      "NodeGCN(\n",
      "  (conv1): GCNConv(10, 20)\n",
      "  (relu1): ReLU()\n",
      "  (conv2): GCNConv(20, 20)\n",
      "  (relu2): ReLU()\n",
      "  (conv3): GCNConv(20, 20)\n",
      "  (relu3): ReLU()\n",
      "  (lin): Linear(in_features=60, out_features=4, bias=True)\n",
      ")\n",
      "Epoch: 0, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.4170\n",
      "Val improved\n",
      "Epoch: 1, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.4121\n",
      "Epoch: 2, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.4074\n",
      "Epoch: 3, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.4039\n",
      "Epoch: 4, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.4007\n",
      "Epoch: 5, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3975\n",
      "Epoch: 6, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3945\n",
      "Epoch: 7, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3916\n",
      "Epoch: 8, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3887\n",
      "Epoch: 9, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3859\n",
      "Epoch: 10, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3832\n",
      "Epoch: 11, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3804\n",
      "Epoch: 12, train_acc: 0.2161, val_acc: 0.3000, train_loss: 1.3777\n",
      "Epoch: 13, train_acc: 0.2304, val_acc: 0.3000, train_loss: 1.3750\n",
      "Epoch: 14, train_acc: 0.3893, val_acc: 0.3857, train_loss: 1.3724\n",
      "Val improved\n",
      "Epoch: 15, train_acc: 0.4161, val_acc: 0.3857, train_loss: 1.3698\n",
      "Epoch: 16, train_acc: 0.4232, val_acc: 0.3143, train_loss: 1.3672\n",
      "Epoch: 17, train_acc: 0.4446, val_acc: 0.3143, train_loss: 1.3646\n",
      "Epoch: 18, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3619\n",
      "Epoch: 19, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3591\n",
      "Epoch: 20, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3564\n",
      "Epoch: 21, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3536\n",
      "Epoch: 22, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3507\n",
      "Epoch: 23, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3477\n",
      "Epoch: 24, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3448\n",
      "Epoch: 25, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3417\n",
      "Epoch: 26, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3385\n",
      "Epoch: 27, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3353\n",
      "Epoch: 28, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3321\n",
      "Epoch: 29, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3288\n",
      "Epoch: 30, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3257\n",
      "Epoch: 31, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3233\n",
      "Epoch: 32, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3208\n",
      "Epoch: 33, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3181\n",
      "Epoch: 34, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3154\n",
      "Epoch: 35, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3128\n",
      "Epoch: 36, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3105\n",
      "Epoch: 37, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3084\n",
      "Epoch: 38, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3062\n",
      "Epoch: 39, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3043\n",
      "Epoch: 40, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3024\n",
      "Epoch: 41, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.3004\n",
      "Epoch: 42, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2984\n",
      "Epoch: 43, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2964\n",
      "Epoch: 44, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2946\n",
      "Epoch: 45, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2929\n",
      "Epoch: 46, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2912\n",
      "Epoch: 47, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2896\n",
      "Epoch: 48, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2880\n",
      "Epoch: 49, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2867\n",
      "Epoch: 50, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2852\n",
      "Epoch: 51, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2839\n",
      "Epoch: 52, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2826\n",
      "Epoch: 53, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2813\n",
      "Epoch: 54, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2804\n",
      "Epoch: 55, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2794\n",
      "Epoch: 56, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2785\n",
      "Epoch: 57, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2776\n",
      "Epoch: 58, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2767\n",
      "Epoch: 59, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2760\n",
      "Epoch: 60, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2753\n",
      "Epoch: 61, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2746\n",
      "Epoch: 62, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2739\n",
      "Epoch: 63, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2733\n",
      "Epoch: 64, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2726\n",
      "Epoch: 65, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2720\n",
      "Epoch: 66, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2715\n",
      "Epoch: 67, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2710\n",
      "Epoch: 68, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2704\n",
      "Epoch: 69, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2698\n",
      "Epoch: 70, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2692\n",
      "Epoch: 71, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2688\n",
      "Epoch: 72, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2682\n",
      "Epoch: 73, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2676\n",
      "Epoch: 74, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2669\n",
      "Epoch: 75, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2664\n",
      "Epoch: 76, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2659\n",
      "Epoch: 77, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2654\n",
      "Epoch: 78, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2647\n",
      "Epoch: 79, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2639\n",
      "Epoch: 80, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2631\n",
      "Epoch: 81, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2623\n",
      "Epoch: 82, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2617\n",
      "Epoch: 83, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2613\n",
      "Epoch: 84, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2605\n",
      "Epoch: 85, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2598\n",
      "Epoch: 86, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2592\n",
      "Epoch: 87, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2585\n",
      "Epoch: 88, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2577\n",
      "Epoch: 89, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2567\n",
      "Epoch: 90, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2557\n",
      "Epoch: 91, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2546\n",
      "Epoch: 92, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2536\n",
      "Epoch: 93, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2524\n",
      "Epoch: 94, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2512\n",
      "Epoch: 95, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2498\n",
      "Epoch: 96, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2485\n",
      "Epoch: 97, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2470\n",
      "Epoch: 98, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2451\n",
      "Epoch: 99, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2425\n",
      "Epoch: 100, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2387\n",
      "Epoch: 101, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2364\n",
      "Epoch: 102, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2339\n",
      "Epoch: 103, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2306\n",
      "Epoch: 104, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2262\n",
      "Epoch: 105, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2225\n",
      "Epoch: 106, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2173\n",
      "Epoch: 107, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2105\n",
      "Epoch: 108, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.2048\n",
      "Epoch: 109, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1974\n",
      "Epoch: 110, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1887\n",
      "Epoch: 111, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1803\n",
      "Epoch: 112, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1702\n",
      "Epoch: 113, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1591\n",
      "Epoch: 114, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1481\n",
      "Epoch: 115, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1352\n",
      "Epoch: 116, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1232\n",
      "Epoch: 117, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.1087\n",
      "Epoch: 118, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.0955\n",
      "Epoch: 119, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.0809\n",
      "Epoch: 120, train_acc: 0.4464, val_acc: 0.3143, train_loss: 1.0672\n",
      "Epoch: 121, train_acc: 0.5357, val_acc: 0.4714, train_loss: 1.0542\n",
      "Val improved\n",
      "Epoch: 122, train_acc: 0.6482, val_acc: 0.5857, train_loss: 1.0412\n",
      "Val improved\n",
      "Epoch: 123, train_acc: 0.6518, val_acc: 0.6000, train_loss: 1.0293\n",
      "Val improved\n",
      "Epoch: 124, train_acc: 0.6536, val_acc: 0.6000, train_loss: 1.0174\n",
      "Epoch: 125, train_acc: 0.6607, val_acc: 0.6000, train_loss: 1.0069\n",
      "Epoch: 126, train_acc: 0.6625, val_acc: 0.6286, train_loss: 0.9954\n",
      "Val improved\n",
      "Epoch: 127, train_acc: 0.6661, val_acc: 0.6286, train_loss: 0.9857\n",
      "Epoch: 128, train_acc: 0.6643, val_acc: 0.6000, train_loss: 0.9752\n",
      "Epoch: 129, train_acc: 0.6714, val_acc: 0.6000, train_loss: 0.9660\n",
      "Epoch: 130, train_acc: 0.6804, val_acc: 0.6143, train_loss: 0.9580\n",
      "Epoch: 131, train_acc: 0.6768, val_acc: 0.6143, train_loss: 0.9499\n",
      "Epoch: 132, train_acc: 0.6696, val_acc: 0.6286, train_loss: 0.9409\n",
      "Epoch: 133, train_acc: 0.6696, val_acc: 0.6286, train_loss: 0.9341\n",
      "Epoch: 134, train_acc: 0.6696, val_acc: 0.6286, train_loss: 0.9270\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 135, train_acc: 0.6768, val_acc: 0.6286, train_loss: 0.9186\n",
      "Epoch: 136, train_acc: 0.6821, val_acc: 0.6143, train_loss: 0.9110\n",
      "Epoch: 137, train_acc: 0.6714, val_acc: 0.6000, train_loss: 0.9054\n",
      "Epoch: 138, train_acc: 0.6732, val_acc: 0.6286, train_loss: 0.8984\n",
      "Epoch: 139, train_acc: 0.6821, val_acc: 0.6143, train_loss: 0.8919\n",
      "Epoch: 140, train_acc: 0.6804, val_acc: 0.6429, train_loss: 0.8870\n",
      "Val improved\n",
      "Epoch: 141, train_acc: 0.6679, val_acc: 0.6286, train_loss: 0.8806\n",
      "Epoch: 142, train_acc: 0.6661, val_acc: 0.6286, train_loss: 0.8761\n",
      "Epoch: 143, train_acc: 0.6893, val_acc: 0.6286, train_loss: 0.8707\n",
      "Epoch: 144, train_acc: 0.7107, val_acc: 0.6571, train_loss: 0.8647\n",
      "Val improved\n",
      "Epoch: 145, train_acc: 0.7000, val_acc: 0.6286, train_loss: 0.8608\n",
      "Epoch: 146, train_acc: 0.6875, val_acc: 0.6286, train_loss: 0.8553\n",
      "Epoch: 147, train_acc: 0.6893, val_acc: 0.6286, train_loss: 0.8506\n",
      "Epoch: 148, train_acc: 0.6982, val_acc: 0.6286, train_loss: 0.8460\n",
      "Epoch: 149, train_acc: 0.7071, val_acc: 0.6429, train_loss: 0.8422\n",
      "Epoch: 150, train_acc: 0.7071, val_acc: 0.6571, train_loss: 0.8378\n",
      "Epoch: 151, train_acc: 0.7036, val_acc: 0.6571, train_loss: 0.8348\n",
      "Epoch: 152, train_acc: 0.7071, val_acc: 0.6429, train_loss: 0.8309\n",
      "Epoch: 153, train_acc: 0.7107, val_acc: 0.6429, train_loss: 0.8266\n",
      "Epoch: 154, train_acc: 0.7179, val_acc: 0.6571, train_loss: 0.8229\n",
      "Epoch: 155, train_acc: 0.7071, val_acc: 0.6571, train_loss: 0.8190\n",
      "Epoch: 156, train_acc: 0.7232, val_acc: 0.6571, train_loss: 0.8164\n",
      "Epoch: 157, train_acc: 0.7571, val_acc: 0.7857, train_loss: 0.8122\n",
      "Val improved\n",
      "Epoch: 158, train_acc: 0.8179, val_acc: 0.8000, train_loss: 0.8100\n",
      "Val improved\n",
      "Epoch: 159, train_acc: 0.7393, val_acc: 0.7000, train_loss: 0.8069\n",
      "Epoch: 160, train_acc: 0.7232, val_acc: 0.6714, train_loss: 0.8019\n",
      "Epoch: 161, train_acc: 0.7589, val_acc: 0.7571, train_loss: 0.7997\n",
      "Epoch: 162, train_acc: 0.7500, val_acc: 0.7286, train_loss: 0.7966\n",
      "Epoch: 163, train_acc: 0.7911, val_acc: 0.8143, train_loss: 0.7928\n",
      "Val improved\n",
      "Epoch: 164, train_acc: 0.7929, val_acc: 0.8143, train_loss: 0.7907\n",
      "Epoch: 165, train_acc: 0.7214, val_acc: 0.6571, train_loss: 0.7876\n",
      "Epoch: 166, train_acc: 0.7321, val_acc: 0.6857, train_loss: 0.7845\n",
      "Epoch: 167, train_acc: 0.7375, val_acc: 0.7143, train_loss: 0.7812\n",
      "Epoch: 168, train_acc: 0.7214, val_acc: 0.6714, train_loss: 0.7799\n",
      "Epoch: 169, train_acc: 0.7482, val_acc: 0.7286, train_loss: 0.7758\n",
      "Epoch: 170, train_acc: 0.7518, val_acc: 0.7286, train_loss: 0.7755\n",
      "Epoch: 171, train_acc: 0.7589, val_acc: 0.7429, train_loss: 0.7724\n",
      "Epoch: 172, train_acc: 0.7179, val_acc: 0.6714, train_loss: 0.7696\n",
      "Epoch: 173, train_acc: 0.7143, val_acc: 0.6571, train_loss: 0.7672\n",
      "Epoch: 174, train_acc: 0.7643, val_acc: 0.8000, train_loss: 0.7641\n",
      "Epoch: 175, train_acc: 0.7625, val_acc: 0.7714, train_loss: 0.7637\n",
      "Epoch: 176, train_acc: 0.7143, val_acc: 0.6571, train_loss: 0.7615\n",
      "Epoch: 177, train_acc: 0.7107, val_acc: 0.6429, train_loss: 0.7579\n",
      "Epoch: 178, train_acc: 0.7411, val_acc: 0.6857, train_loss: 0.7562\n",
      "Epoch: 179, train_acc: 0.8143, val_acc: 0.8286, train_loss: 0.7530\n",
      "Val improved\n",
      "Epoch: 180, train_acc: 0.7786, val_acc: 0.7857, train_loss: 0.7528\n",
      "Epoch: 181, train_acc: 0.7250, val_acc: 0.6714, train_loss: 0.7501\n",
      "Epoch: 182, train_acc: 0.7107, val_acc: 0.6571, train_loss: 0.7475\n",
      "Epoch: 183, train_acc: 0.7143, val_acc: 0.6571, train_loss: 0.7457\n",
      "Epoch: 184, train_acc: 0.7464, val_acc: 0.7286, train_loss: 0.7439\n",
      "Epoch: 185, train_acc: 0.7339, val_acc: 0.6857, train_loss: 0.7413\n",
      "Epoch: 186, train_acc: 0.7196, val_acc: 0.6571, train_loss: 0.7388\n",
      "Epoch: 187, train_acc: 0.7286, val_acc: 0.6857, train_loss: 0.7376\n",
      "Epoch: 188, train_acc: 0.7321, val_acc: 0.6857, train_loss: 0.7359\n",
      "Epoch: 189, train_acc: 0.7286, val_acc: 0.6857, train_loss: 0.7331\n",
      "Epoch: 190, train_acc: 0.7196, val_acc: 0.6714, train_loss: 0.7308\n",
      "Epoch: 191, train_acc: 0.7179, val_acc: 0.6714, train_loss: 0.7301\n",
      "Epoch: 192, train_acc: 0.7482, val_acc: 0.7000, train_loss: 0.7274\n",
      "Epoch: 193, train_acc: 0.7804, val_acc: 0.8143, train_loss: 0.7263\n",
      "Epoch: 194, train_acc: 0.7821, val_acc: 0.8143, train_loss: 0.7245\n",
      "Epoch: 195, train_acc: 0.7304, val_acc: 0.6857, train_loss: 0.7228\n",
      "Epoch: 196, train_acc: 0.7250, val_acc: 0.6714, train_loss: 0.7208\n",
      "Epoch: 197, train_acc: 0.7357, val_acc: 0.6857, train_loss: 0.7181\n",
      "Epoch: 198, train_acc: 0.7607, val_acc: 0.7571, train_loss: 0.7162\n",
      "Epoch: 199, train_acc: 0.7304, val_acc: 0.6857, train_loss: 0.7136\n",
      "Epoch: 200, train_acc: 0.7179, val_acc: 0.6714, train_loss: 0.7124\n",
      "Epoch: 201, train_acc: 0.7321, val_acc: 0.6857, train_loss: 0.7122\n",
      "Epoch: 202, train_acc: 0.7946, val_acc: 0.8143, train_loss: 0.7095\n",
      "Epoch: 203, train_acc: 0.8571, val_acc: 0.8714, train_loss: 0.7075\n",
      "Val improved\n",
      "Epoch: 204, train_acc: 0.7786, val_acc: 0.8143, train_loss: 0.7061\n",
      "Epoch: 205, train_acc: 0.7339, val_acc: 0.7000, train_loss: 0.7032\n",
      "Epoch: 206, train_acc: 0.7446, val_acc: 0.7286, train_loss: 0.7018\n",
      "Epoch: 207, train_acc: 0.7750, val_acc: 0.8143, train_loss: 0.6988\n",
      "Epoch: 208, train_acc: 0.7482, val_acc: 0.6857, train_loss: 0.6979\n",
      "Epoch: 209, train_acc: 0.7286, val_acc: 0.6857, train_loss: 0.6983\n",
      "Epoch: 210, train_acc: 0.7375, val_acc: 0.7000, train_loss: 0.6948\n",
      "Epoch: 211, train_acc: 0.7679, val_acc: 0.8143, train_loss: 0.6939\n",
      "Epoch: 212, train_acc: 0.8536, val_acc: 0.8714, train_loss: 0.6916\n",
      "Epoch: 213, train_acc: 0.8571, val_acc: 0.8714, train_loss: 0.6896\n",
      "Epoch: 214, train_acc: 0.7982, val_acc: 0.8286, train_loss: 0.6871\n",
      "Epoch: 215, train_acc: 0.7732, val_acc: 0.8143, train_loss: 0.6855\n",
      "Epoch: 216, train_acc: 0.7768, val_acc: 0.8143, train_loss: 0.6834\n",
      "Epoch: 217, train_acc: 0.7786, val_acc: 0.8143, train_loss: 0.6836\n",
      "Epoch: 218, train_acc: 0.8196, val_acc: 0.8286, train_loss: 0.6814\n",
      "Epoch: 219, train_acc: 0.8429, val_acc: 0.8714, train_loss: 0.6773\n",
      "Epoch: 220, train_acc: 0.8554, val_acc: 0.8714, train_loss: 0.6792\n",
      "Epoch: 221, train_acc: 0.8571, val_acc: 0.8857, train_loss: 0.6765\n",
      "Val improved\n",
      "Epoch: 222, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6741\n",
      "Val improved\n",
      "Epoch: 223, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6723\n",
      "Epoch: 224, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6707\n",
      "Epoch: 225, train_acc: 0.8518, val_acc: 0.8714, train_loss: 0.6705\n",
      "Epoch: 226, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6678\n",
      "Epoch: 227, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6670\n",
      "Epoch: 228, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6658\n",
      "Epoch: 229, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6638\n",
      "Epoch: 230, train_acc: 0.8500, val_acc: 0.8857, train_loss: 0.6606\n",
      "Epoch: 231, train_acc: 0.8500, val_acc: 0.8857, train_loss: 0.6619\n",
      "Epoch: 232, train_acc: 0.8554, val_acc: 0.8857, train_loss: 0.6610\n",
      "Epoch: 233, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6578\n",
      "Epoch: 234, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6569\n",
      "Epoch: 235, train_acc: 0.8554, val_acc: 0.9000, train_loss: 0.6529\n",
      "Epoch: 236, train_acc: 0.8554, val_acc: 0.9000, train_loss: 0.6548\n",
      "Epoch: 237, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6534\n",
      "Epoch: 238, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6509\n",
      "Epoch: 239, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6508\n",
      "Epoch: 240, train_acc: 0.8536, val_acc: 0.8857, train_loss: 0.6486\n",
      "Epoch: 241, train_acc: 0.8518, val_acc: 0.8857, train_loss: 0.6482\n",
      "Epoch: 242, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6463\n",
      "Epoch: 243, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6493\n",
      "Epoch: 244, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6470\n",
      "Epoch: 245, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6466\n",
      "Epoch: 246, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6434\n",
      "Epoch: 247, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6399\n",
      "Epoch: 248, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6387\n",
      "Epoch: 249, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6379\n",
      "Epoch: 250, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6361\n",
      "Epoch: 251, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6335\n",
      "Epoch: 252, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6333\n",
      "Epoch: 253, train_acc: 0.8554, val_acc: 0.9000, train_loss: 0.6344\n",
      "Epoch: 254, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6335\n",
      "Epoch: 255, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6317\n",
      "Epoch: 256, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6299\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 257, train_acc: 0.8536, val_acc: 0.9000, train_loss: 0.6279\n",
      "Epoch: 258, train_acc: 0.8536, val_acc: 0.9000, train_loss: 0.6319\n",
      "Epoch: 259, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6302\n",
      "Epoch: 260, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6233\n",
      "Epoch: 261, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6224\n",
      "Epoch: 262, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6207\n",
      "Epoch: 263, train_acc: 0.8571, val_acc: 0.9000, train_loss: 0.6219\n",
      "Epoch: 264, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6180\n",
      "Epoch: 265, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6220\n",
      "Epoch: 266, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6216\n",
      "Epoch: 267, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6186\n",
      "Epoch: 268, train_acc: 0.8500, val_acc: 0.9000, train_loss: 0.6146\n",
      "Epoch: 269, train_acc: 0.8536, val_acc: 0.9000, train_loss: 0.6152\n",
      "Epoch: 270, train_acc: 0.8554, val_acc: 0.9000, train_loss: 0.6146\n",
      "Epoch: 271, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6114\n",
      "Epoch: 272, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6089\n",
      "Epoch: 273, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.6099\n",
      "Epoch: 274, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6080\n",
      "Epoch: 275, train_acc: 0.8536, val_acc: 0.9000, train_loss: 0.6071\n",
      "Epoch: 276, train_acc: 0.8518, val_acc: 0.9000, train_loss: 0.6057\n",
      "Epoch: 277, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.6049\n",
      "Epoch: 278, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6028\n",
      "Epoch: 279, train_acc: 0.8982, val_acc: 0.9143, train_loss: 0.6007\n",
      "Val improved\n",
      "Epoch: 280, train_acc: 0.8821, val_acc: 0.9143, train_loss: 0.6038\n",
      "Epoch: 281, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6016\n",
      "Epoch: 282, train_acc: 0.8625, val_acc: 0.9000, train_loss: 0.6029\n",
      "Epoch: 283, train_acc: 0.8679, val_acc: 0.9143, train_loss: 0.6011\n",
      "Epoch: 284, train_acc: 0.8821, val_acc: 0.9143, train_loss: 0.5957\n",
      "Epoch: 285, train_acc: 0.8589, val_acc: 0.9000, train_loss: 0.5955\n",
      "Epoch: 286, train_acc: 0.9054, val_acc: 0.9143, train_loss: 0.5931\n",
      "Epoch: 287, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.5934\n",
      "Epoch: 288, train_acc: 0.8946, val_acc: 0.9143, train_loss: 0.5914\n",
      "Epoch: 289, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.5929\n",
      "Epoch: 290, train_acc: 0.8875, val_acc: 0.9143, train_loss: 0.5921\n",
      "Epoch: 291, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5918\n",
      "Val improved\n",
      "Epoch: 292, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.5891\n",
      "Epoch: 293, train_acc: 0.8893, val_acc: 0.9143, train_loss: 0.5871\n",
      "Epoch: 294, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5847\n",
      "Epoch: 295, train_acc: 0.9000, val_acc: 0.9143, train_loss: 0.5851\n",
      "Epoch: 296, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5826\n",
      "Epoch: 297, train_acc: 0.8964, val_acc: 0.9143, train_loss: 0.5797\n",
      "Epoch: 298, train_acc: 0.9571, val_acc: 0.9714, train_loss: 0.5788\n",
      "Epoch: 299, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5823\n",
      "Epoch: 300, train_acc: 0.8607, val_acc: 0.9000, train_loss: 0.5800\n",
      "Epoch: 301, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5828\n",
      "Epoch: 302, train_acc: 0.9571, val_acc: 0.9714, train_loss: 0.5789\n",
      "Epoch: 303, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5787\n",
      "Epoch: 304, train_acc: 0.8839, val_acc: 0.9143, train_loss: 0.5811\n",
      "Epoch: 305, train_acc: 0.8946, val_acc: 0.9143, train_loss: 0.5800\n",
      "Epoch: 306, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5748\n",
      "Epoch: 307, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5821\n",
      "Epoch: 308, train_acc: 0.8804, val_acc: 0.9000, train_loss: 0.5800\n",
      "Epoch: 309, train_acc: 0.8875, val_acc: 0.9143, train_loss: 0.5734\n",
      "Epoch: 310, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5727\n",
      "Epoch: 311, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5714\n",
      "Epoch: 312, train_acc: 0.9571, val_acc: 0.9714, train_loss: 0.5680\n",
      "Epoch: 313, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5684\n",
      "Epoch: 314, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5709\n",
      "Epoch: 315, train_acc: 0.9518, val_acc: 0.9714, train_loss: 0.5658\n",
      "Epoch: 316, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5679\n",
      "Epoch: 317, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5725\n",
      "Epoch: 318, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5635\n",
      "Epoch: 319, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5654\n",
      "Epoch: 320, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5647\n",
      "Epoch: 321, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5619\n",
      "Epoch: 322, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5625\n",
      "Epoch: 323, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5603\n",
      "Epoch: 324, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5595\n",
      "Epoch: 325, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5598\n",
      "Epoch: 326, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5544\n",
      "Epoch: 327, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5575\n",
      "Epoch: 328, train_acc: 0.9107, val_acc: 0.9143, train_loss: 0.5590\n",
      "Epoch: 329, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5569\n",
      "Epoch: 330, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5549\n",
      "Epoch: 331, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5563\n",
      "Epoch: 332, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5548\n",
      "Epoch: 333, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5522\n",
      "Epoch: 334, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5495\n",
      "Epoch: 335, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5571\n",
      "Epoch: 336, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5555\n",
      "Epoch: 337, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5474\n",
      "Epoch: 338, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5455\n",
      "Epoch: 339, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5461\n",
      "Epoch: 340, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5426\n",
      "Epoch: 341, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5456\n",
      "Epoch: 342, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5436\n",
      "Epoch: 343, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5480\n",
      "Epoch: 344, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5501\n",
      "Epoch: 345, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5420\n",
      "Epoch: 346, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5407\n",
      "Epoch: 347, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5428\n",
      "Epoch: 348, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5378\n",
      "Epoch: 349, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5404\n",
      "Epoch: 350, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5420\n",
      "Epoch: 351, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5353\n",
      "Epoch: 352, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5343\n",
      "Epoch: 353, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5315\n",
      "Epoch: 354, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5316\n",
      "Epoch: 355, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5293\n",
      "Epoch: 356, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5301\n",
      "Epoch: 357, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5281\n",
      "Epoch: 358, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5298\n",
      "Epoch: 359, train_acc: 0.9589, val_acc: 0.9714, train_loss: 0.5268\n",
      "Epoch: 360, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5260\n",
      "Epoch: 361, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5232\n",
      "Epoch: 362, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5281\n",
      "Epoch: 363, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5281\n",
      "Epoch: 364, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5240\n",
      "Epoch: 365, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5261\n",
      "Epoch: 366, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5227\n",
      "Epoch: 367, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5254\n",
      "Epoch: 368, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5264\n",
      "Epoch: 369, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5212\n",
      "Epoch: 370, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5162\n",
      "Epoch: 371, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5267\n",
      "Epoch: 372, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5247\n",
      "Epoch: 373, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5130\n",
      "Epoch: 374, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5165\n",
      "Epoch: 375, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5160\n",
      "Epoch: 376, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5128\n",
      "Epoch: 377, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5102\n",
      "Epoch: 378, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5128\n",
      "Epoch: 379, train_acc: 0.9589, val_acc: 0.9571, train_loss: 0.5129\n",
      "Epoch: 380, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5142\n",
      "Epoch: 381, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5099\n",
      "Epoch: 382, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5128\n",
      "Epoch: 383, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5100\n",
      "Epoch: 384, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5180\n",
      "Epoch: 385, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5116\n",
      "Epoch: 386, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5028\n",
      "Epoch: 387, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.5038\n",
      "Epoch: 388, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5041\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 389, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4987\n",
      "Epoch: 390, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5088\n",
      "Epoch: 391, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5109\n",
      "Epoch: 392, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5023\n",
      "Epoch: 393, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.5022\n",
      "Epoch: 394, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.5007\n",
      "Epoch: 395, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.4966\n",
      "Epoch: 396, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.4972\n",
      "Epoch: 397, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4947\n",
      "Epoch: 398, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4950\n",
      "Epoch: 399, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.4939\n",
      "Epoch: 400, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.4925\n",
      "Epoch: 401, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4923\n",
      "Epoch: 402, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4897\n",
      "Epoch: 403, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4894\n",
      "Epoch: 404, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4875\n",
      "Epoch: 405, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.4865\n",
      "Epoch: 406, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4857\n",
      "Epoch: 407, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4839\n",
      "Epoch: 408, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.4833\n",
      "Epoch: 409, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.4859\n",
      "Epoch: 410, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4819\n",
      "Epoch: 411, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4849\n",
      "Epoch: 412, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4829\n",
      "Epoch: 413, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4850\n",
      "Epoch: 414, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4810\n",
      "Epoch: 415, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4835\n",
      "Epoch: 416, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.4796\n",
      "Epoch: 417, train_acc: 0.9607, val_acc: 0.9571, train_loss: 0.4825\n",
      "Epoch: 418, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4814\n",
      "Epoch: 419, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4782\n",
      "Epoch: 420, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4746\n",
      "Epoch: 421, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4768\n",
      "Epoch: 422, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4750\n",
      "Epoch: 423, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4767\n",
      "Epoch: 424, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4742\n",
      "Epoch: 425, train_acc: 0.9589, val_acc: 0.9571, train_loss: 0.4765\n",
      "Epoch: 426, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.4747\n",
      "Epoch: 427, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4705\n",
      "Epoch: 428, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4693\n",
      "Epoch: 429, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4678\n",
      "Epoch: 430, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4648\n",
      "Epoch: 431, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4733\n",
      "Epoch: 432, train_acc: 0.9589, val_acc: 0.9571, train_loss: 0.4709\n",
      "Epoch: 433, train_acc: 0.9607, val_acc: 0.9714, train_loss: 0.4681\n",
      "Epoch: 434, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4675\n",
      "Epoch: 435, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4674\n",
      "Epoch: 436, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4613\n",
      "Epoch: 437, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4665\n",
      "Epoch: 438, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4687\n",
      "Epoch: 439, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4689\n",
      "Epoch: 440, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4631\n",
      "Epoch: 441, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4636\n",
      "Epoch: 442, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4660\n",
      "Epoch: 443, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4598\n",
      "Epoch: 444, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4567\n",
      "Epoch: 445, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4597\n",
      "Epoch: 446, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4601\n",
      "Epoch: 447, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4601\n",
      "Epoch: 448, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4525\n",
      "Epoch: 449, train_acc: 0.9625, val_acc: 0.9571, train_loss: 0.4645\n",
      "Epoch: 450, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4653\n",
      "Epoch: 451, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4626\n",
      "Epoch: 452, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4591\n",
      "Epoch: 453, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4470\n",
      "Epoch: 454, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.4581\n",
      "Epoch: 455, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4634\n",
      "Epoch: 456, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4601\n",
      "Epoch: 457, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4508\n",
      "Epoch: 458, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4467\n",
      "Epoch: 459, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4525\n",
      "Epoch: 460, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4508\n",
      "Epoch: 461, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4498\n",
      "Epoch: 462, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4435\n",
      "Epoch: 463, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4469\n",
      "Epoch: 464, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4397\n",
      "Epoch: 465, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4484\n",
      "Epoch: 466, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.4460\n",
      "Epoch: 467, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4473\n",
      "Epoch: 468, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4437\n",
      "Epoch: 469, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4416\n",
      "Epoch: 470, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4407\n",
      "Epoch: 471, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4394\n",
      "Epoch: 472, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4367\n",
      "Epoch: 473, train_acc: 0.9643, val_acc: 0.9571, train_loss: 0.4377\n",
      "Epoch: 474, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4356\n",
      "Epoch: 475, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4375\n",
      "Epoch: 476, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4349\n",
      "Epoch: 477, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4324\n",
      "Epoch: 478, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4318\n",
      "Epoch: 479, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4345\n",
      "Epoch: 480, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4341\n",
      "Epoch: 481, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4305\n",
      "Epoch: 482, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4280\n",
      "Epoch: 483, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4335\n",
      "Epoch: 484, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4321\n",
      "Epoch: 485, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4246\n",
      "Epoch: 486, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4228\n",
      "Epoch: 487, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4304\n",
      "Epoch: 488, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4273\n",
      "Epoch: 489, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4267\n",
      "Epoch: 490, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4255\n",
      "Epoch: 491, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4246\n",
      "Epoch: 492, train_acc: 0.9625, val_acc: 0.9571, train_loss: 0.4223\n",
      "Epoch: 493, train_acc: 0.9625, val_acc: 0.9571, train_loss: 0.4259\n",
      "Epoch: 494, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4245\n",
      "Epoch: 495, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4239\n",
      "Epoch: 496, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4241\n",
      "Epoch: 497, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4193\n",
      "Epoch: 498, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4172\n",
      "Epoch: 499, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.4222\n",
      "Epoch: 500, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4219\n",
      "Epoch: 501, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4146\n",
      "Epoch: 502, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4128\n",
      "Epoch: 503, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4170\n",
      "Epoch: 504, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4117\n",
      "Epoch: 505, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4235\n",
      "Epoch: 506, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4212\n",
      "Epoch: 507, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4129\n",
      "Epoch: 508, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4114\n",
      "Epoch: 509, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4139\n",
      "Epoch: 510, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4106\n",
      "Epoch: 511, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4143\n",
      "Epoch: 512, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.4140\n",
      "Epoch: 513, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4069\n",
      "Epoch: 514, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4080\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 515, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4058\n",
      "Epoch: 516, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4031\n",
      "Epoch: 517, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4041\n",
      "Epoch: 518, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4042\n",
      "Epoch: 519, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.4011\n",
      "Epoch: 520, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.4017\n",
      "Epoch: 521, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3997\n",
      "Epoch: 522, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3996\n",
      "Epoch: 523, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3965\n",
      "Epoch: 524, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3951\n",
      "Epoch: 525, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3998\n",
      "Epoch: 526, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3957\n",
      "Epoch: 527, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3947\n",
      "Epoch: 528, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3967\n",
      "Epoch: 529, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3911\n",
      "Epoch: 530, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3902\n",
      "Epoch: 531, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3965\n",
      "Epoch: 532, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3918\n",
      "Epoch: 533, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.4017\n",
      "Epoch: 534, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3988\n",
      "Epoch: 535, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3907\n",
      "Epoch: 536, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.3913\n",
      "Epoch: 537, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3881\n",
      "Epoch: 538, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.3857\n",
      "Epoch: 539, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.3920\n",
      "Epoch: 540, train_acc: 0.9643, val_acc: 0.9571, train_loss: 0.3874\n",
      "Epoch: 541, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3898\n",
      "Epoch: 542, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3895\n",
      "Epoch: 543, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3860\n",
      "Epoch: 544, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3829\n",
      "Epoch: 545, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3855\n",
      "Epoch: 546, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3824\n",
      "Epoch: 547, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3892\n",
      "Epoch: 548, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3908\n",
      "Epoch: 549, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3818\n",
      "Epoch: 550, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3811\n",
      "Epoch: 551, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3793\n",
      "Epoch: 552, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3776\n",
      "Epoch: 553, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3813\n",
      "Epoch: 554, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3788\n",
      "Epoch: 555, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3849\n",
      "Epoch: 556, train_acc: 0.9643, val_acc: 0.9571, train_loss: 0.3810\n",
      "Epoch: 557, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3775\n",
      "Epoch: 558, train_acc: 0.9750, val_acc: 0.9571, train_loss: 0.3749\n",
      "Epoch: 559, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3768\n",
      "Epoch: 560, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3766\n",
      "Epoch: 561, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3776\n",
      "Epoch: 562, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3760\n",
      "Epoch: 563, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.3776\n",
      "Epoch: 564, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3768\n",
      "Epoch: 565, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3692\n",
      "Epoch: 566, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3684\n",
      "Epoch: 567, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3689\n",
      "Epoch: 568, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3703\n",
      "Epoch: 569, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.3662\n",
      "Epoch: 570, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3679\n",
      "Epoch: 571, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3657\n",
      "Epoch: 572, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3662\n",
      "Epoch: 573, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3679\n",
      "Epoch: 574, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3637\n",
      "Epoch: 575, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3687\n",
      "Epoch: 576, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3690\n",
      "Epoch: 577, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3705\n",
      "Epoch: 578, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3616\n",
      "Epoch: 579, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.3743\n",
      "Epoch: 580, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3774\n",
      "Epoch: 581, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3699\n",
      "Epoch: 582, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3642\n",
      "Epoch: 583, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3639\n",
      "Epoch: 584, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.3655\n",
      "Epoch: 585, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3620\n",
      "Epoch: 586, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3568\n",
      "Epoch: 587, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3678\n",
      "Epoch: 588, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3650\n",
      "Epoch: 589, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3624\n",
      "Epoch: 590, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3594\n",
      "Epoch: 591, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3567\n",
      "Epoch: 592, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3556\n",
      "Epoch: 593, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3612\n",
      "Epoch: 594, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3580\n",
      "Epoch: 595, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3575\n",
      "Epoch: 596, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3575\n",
      "Epoch: 597, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.3545\n",
      "Epoch: 598, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3530\n",
      "Epoch: 599, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3520\n",
      "Epoch: 600, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3515\n",
      "Epoch: 601, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3547\n",
      "Epoch: 602, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3501\n",
      "Epoch: 603, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3543\n",
      "Epoch: 604, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3534\n",
      "Epoch: 605, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3508\n",
      "Epoch: 606, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3492\n",
      "Epoch: 607, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3492\n",
      "Epoch: 608, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3477\n",
      "Epoch: 609, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3493\n",
      "Epoch: 610, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3489\n",
      "Epoch: 611, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3468\n",
      "Epoch: 612, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3448\n",
      "Epoch: 613, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3492\n",
      "Epoch: 614, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3463\n",
      "Epoch: 615, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3428\n",
      "Epoch: 616, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3431\n",
      "Epoch: 617, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3462\n",
      "Epoch: 618, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3441\n",
      "Epoch: 619, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3432\n",
      "Epoch: 620, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3406\n",
      "Epoch: 621, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3427\n",
      "Epoch: 622, train_acc: 0.9643, val_acc: 0.9571, train_loss: 0.3422\n",
      "Epoch: 623, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3393\n",
      "Epoch: 624, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3370\n",
      "Epoch: 625, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3447\n",
      "Epoch: 626, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3439\n",
      "Epoch: 627, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3359\n",
      "Epoch: 628, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3353\n",
      "Epoch: 629, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3386\n",
      "Epoch: 630, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3353\n",
      "Epoch: 631, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3385\n",
      "Epoch: 632, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3381\n",
      "Epoch: 633, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3326\n",
      "Epoch: 634, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3304\n",
      "Epoch: 635, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3385\n",
      "Epoch: 636, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3364\n",
      "Epoch: 637, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3377\n",
      "Epoch: 638, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3362\n",
      "Epoch: 639, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3325\n",
      "Epoch: 640, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3299\n",
      "Epoch: 641, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3361\n",
      "Epoch: 642, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3342\n",
      "Epoch: 643, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3316\n",
      "Epoch: 644, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3302\n",
      "Epoch: 645, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3298\n",
      "Epoch: 646, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3286\n",
      "Epoch: 647, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3308\n",
      "Epoch: 648, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3281\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 649, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3289\n",
      "Epoch: 650, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3295\n",
      "Epoch: 651, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3246\n",
      "Epoch: 652, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3233\n",
      "Epoch: 653, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3295\n",
      "Epoch: 654, train_acc: 0.9643, val_acc: 0.9571, train_loss: 0.3260\n",
      "Epoch: 655, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3270\n",
      "Epoch: 656, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3262\n",
      "Epoch: 657, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3219\n",
      "Epoch: 658, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3199\n",
      "Epoch: 659, train_acc: 0.9643, val_acc: 0.9571, train_loss: 0.3278\n",
      "Epoch: 660, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3266\n",
      "Epoch: 661, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3237\n",
      "Epoch: 662, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3222\n",
      "Epoch: 663, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3214\n",
      "Epoch: 664, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.3185\n",
      "Epoch: 665, train_acc: 0.9643, val_acc: 0.9714, train_loss: 0.3272\n",
      "Epoch: 666, train_acc: 0.9643, val_acc: 0.9571, train_loss: 0.3279\n",
      "Epoch: 667, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3196\n",
      "Epoch: 668, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.3212\n",
      "Epoch: 669, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3195\n",
      "Epoch: 670, train_acc: 0.9661, val_acc: 0.9429, train_loss: 0.3237\n",
      "Epoch: 671, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3188\n",
      "Epoch: 672, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3217\n",
      "Epoch: 673, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3209\n",
      "Epoch: 674, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3162\n",
      "Epoch: 675, train_acc: 0.9732, val_acc: 0.9429, train_loss: 0.3144\n",
      "Epoch: 676, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3175\n",
      "Epoch: 677, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3135\n",
      "Epoch: 678, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3196\n",
      "Epoch: 679, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3220\n",
      "Epoch: 680, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3106\n",
      "Epoch: 681, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3116\n",
      "Epoch: 682, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3129\n",
      "Epoch: 683, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3093\n",
      "Epoch: 684, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.3165\n",
      "Epoch: 685, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.3182\n",
      "Epoch: 686, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3123\n",
      "Epoch: 687, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3093\n",
      "Epoch: 688, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3098\n",
      "Epoch: 689, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.3081\n",
      "Epoch: 690, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3066\n",
      "Epoch: 691, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.3077\n",
      "Epoch: 692, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3077\n",
      "Epoch: 693, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.3055\n",
      "Epoch: 694, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.3068\n",
      "Epoch: 695, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3044\n",
      "Epoch: 696, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3101\n",
      "Epoch: 697, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3061\n",
      "Epoch: 698, train_acc: 0.9696, val_acc: 0.9429, train_loss: 0.3093\n",
      "Epoch: 699, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3100\n",
      "Epoch: 700, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.3045\n",
      "Epoch: 701, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3025\n",
      "Epoch: 702, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3024\n",
      "Epoch: 703, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3028\n",
      "Epoch: 704, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3033\n",
      "Epoch: 705, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2995\n",
      "Epoch: 706, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3038\n",
      "Epoch: 707, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3027\n",
      "Epoch: 708, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.3026\n",
      "Epoch: 709, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.3057\n",
      "Epoch: 710, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.2996\n",
      "Epoch: 711, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.3021\n",
      "Epoch: 712, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.3022\n",
      "Epoch: 713, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.2968\n",
      "Epoch: 714, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3027\n",
      "Epoch: 715, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2971\n",
      "Epoch: 716, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2984\n",
      "Epoch: 717, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2995\n",
      "Epoch: 718, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2958\n",
      "Epoch: 719, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2972\n",
      "Epoch: 720, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2970\n",
      "Epoch: 721, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.2924\n",
      "Epoch: 722, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2960\n",
      "Epoch: 723, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2923\n",
      "Epoch: 724, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.3034\n",
      "Epoch: 725, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3053\n",
      "Epoch: 726, train_acc: 0.9625, val_acc: 0.9714, train_loss: 0.2953\n",
      "Epoch: 727, train_acc: 0.9625, val_acc: 0.9571, train_loss: 0.3044\n",
      "Epoch: 728, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.3059\n",
      "Epoch: 729, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2913\n",
      "Epoch: 730, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.3018\n",
      "Epoch: 731, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.3045\n",
      "Epoch: 732, train_acc: 0.9607, val_acc: 0.9571, train_loss: 0.2925\n",
      "Epoch: 733, train_acc: 0.9607, val_acc: 0.9571, train_loss: 0.2958\n",
      "Epoch: 734, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2961\n",
      "Epoch: 735, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2916\n",
      "Epoch: 736, train_acc: 0.9732, val_acc: 0.9429, train_loss: 0.2937\n",
      "Epoch: 737, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2911\n",
      "Epoch: 738, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.2869\n",
      "Epoch: 739, train_acc: 0.9661, val_acc: 0.9714, train_loss: 0.2951\n",
      "Epoch: 740, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.2917\n",
      "Epoch: 741, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.2874\n",
      "Epoch: 742, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2895\n",
      "Epoch: 743, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2885\n",
      "Epoch: 744, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2858\n",
      "Epoch: 745, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.2877\n",
      "Epoch: 746, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2847\n",
      "Epoch: 747, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2872\n",
      "Epoch: 748, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2854\n",
      "Epoch: 749, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2912\n",
      "Epoch: 750, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2898\n",
      "Epoch: 751, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2830\n",
      "Epoch: 752, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2804\n",
      "Epoch: 753, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2827\n",
      "Epoch: 754, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2808\n",
      "Epoch: 755, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2819\n",
      "Epoch: 756, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2816\n",
      "Epoch: 757, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.2835\n",
      "Epoch: 758, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2793\n",
      "Epoch: 759, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2831\n",
      "Epoch: 760, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2859\n",
      "Epoch: 761, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2768\n",
      "Epoch: 762, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2788\n",
      "Epoch: 763, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2799\n",
      "Epoch: 764, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2797\n",
      "Epoch: 765, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2755\n",
      "Epoch: 766, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.2744\n",
      "Epoch: 767, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2746\n",
      "Epoch: 768, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2764\n",
      "Epoch: 769, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.2745\n",
      "Epoch: 770, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2737\n",
      "Epoch: 771, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.2710\n",
      "Epoch: 772, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.2775\n",
      "Epoch: 773, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2758\n",
      "Epoch: 774, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2744\n",
      "Epoch: 775, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2777\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 776, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2705\n",
      "Epoch: 777, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.2739\n",
      "Epoch: 778, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2716\n",
      "Epoch: 779, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2710\n",
      "Epoch: 780, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2701\n",
      "Epoch: 781, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2698\n",
      "Epoch: 782, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2691\n",
      "Epoch: 783, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2655\n",
      "Epoch: 784, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2746\n",
      "Epoch: 785, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2738\n",
      "Epoch: 786, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2660\n",
      "Epoch: 787, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2647\n",
      "Epoch: 788, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2684\n",
      "Epoch: 789, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2671\n",
      "Epoch: 790, train_acc: 0.9696, val_acc: 0.9429, train_loss: 0.2680\n",
      "Epoch: 791, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2657\n",
      "Epoch: 792, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2645\n",
      "Epoch: 793, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2624\n",
      "Epoch: 794, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2666\n",
      "Epoch: 795, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2651\n",
      "Epoch: 796, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2669\n",
      "Epoch: 797, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2655\n",
      "Epoch: 798, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2614\n",
      "Epoch: 799, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2603\n",
      "Epoch: 800, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2632\n",
      "Epoch: 801, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.2629\n",
      "Epoch: 802, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2603\n",
      "Epoch: 803, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2585\n",
      "Epoch: 804, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2634\n",
      "Epoch: 805, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2616\n",
      "Epoch: 806, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2602\n",
      "Epoch: 807, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2600\n",
      "Epoch: 808, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2593\n",
      "Epoch: 809, train_acc: 0.9679, val_acc: 0.9429, train_loss: 0.2577\n",
      "Epoch: 810, train_acc: 0.9661, val_acc: 0.9429, train_loss: 0.2584\n",
      "Epoch: 811, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2571\n",
      "Epoch: 812, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2589\n",
      "Epoch: 813, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2576\n",
      "Epoch: 814, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2571\n",
      "Epoch: 815, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2556\n",
      "Epoch: 816, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2583\n",
      "Epoch: 817, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2578\n",
      "Epoch: 818, train_acc: 0.9679, val_acc: 0.9429, train_loss: 0.2552\n",
      "Epoch: 819, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2541\n",
      "Epoch: 820, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2560\n",
      "Epoch: 821, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2537\n",
      "Epoch: 822, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2544\n",
      "Epoch: 823, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2541\n",
      "Epoch: 824, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2518\n",
      "Epoch: 825, train_acc: 0.9696, val_acc: 0.9429, train_loss: 0.2510\n",
      "Epoch: 826, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2534\n",
      "Epoch: 827, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2509\n",
      "Epoch: 828, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2562\n",
      "Epoch: 829, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2552\n",
      "Epoch: 830, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.2481\n",
      "Epoch: 831, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2490\n",
      "Epoch: 832, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2516\n",
      "Epoch: 833, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2502\n",
      "Epoch: 834, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.2538\n",
      "Epoch: 835, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2544\n",
      "Epoch: 836, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2527\n",
      "Epoch: 837, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2516\n",
      "Epoch: 838, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.2500\n",
      "Epoch: 839, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2480\n",
      "Epoch: 840, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2525\n",
      "Epoch: 841, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2524\n",
      "Epoch: 842, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2503\n",
      "Epoch: 843, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2475\n",
      "Epoch: 844, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2512\n",
      "Epoch: 845, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2480\n",
      "Epoch: 846, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2462\n",
      "Epoch: 847, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2451\n",
      "Epoch: 848, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2471\n",
      "Epoch: 849, train_acc: 0.9696, val_acc: 0.9429, train_loss: 0.2450\n",
      "Epoch: 850, train_acc: 0.9696, val_acc: 0.9429, train_loss: 0.2482\n",
      "Epoch: 851, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2477\n",
      "Epoch: 852, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2457\n",
      "Epoch: 853, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2434\n",
      "Epoch: 854, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2427\n",
      "Epoch: 855, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2417\n",
      "Epoch: 856, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2487\n",
      "Epoch: 857, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2463\n",
      "Epoch: 858, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2407\n",
      "Epoch: 859, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2393\n",
      "Epoch: 860, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2445\n",
      "Epoch: 861, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2444\n",
      "Epoch: 862, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2401\n",
      "Epoch: 863, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.2390\n",
      "Epoch: 864, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2417\n",
      "Epoch: 865, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2401\n",
      "Epoch: 866, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2390\n",
      "Epoch: 867, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2392\n",
      "Epoch: 868, train_acc: 0.9696, val_acc: 0.9714, train_loss: 0.2387\n",
      "Epoch: 869, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2377\n",
      "Epoch: 870, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2386\n",
      "Epoch: 871, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2380\n",
      "Epoch: 872, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2383\n",
      "Epoch: 873, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2374\n",
      "Epoch: 874, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2357\n",
      "Epoch: 875, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2347\n",
      "Epoch: 876, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2404\n",
      "Epoch: 877, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2404\n",
      "Epoch: 878, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2322\n",
      "Epoch: 879, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2360\n",
      "Epoch: 880, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2339\n",
      "Epoch: 881, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2355\n",
      "Epoch: 882, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2382\n",
      "Epoch: 883, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2321\n",
      "Epoch: 884, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2396\n",
      "Epoch: 885, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2392\n",
      "Epoch: 886, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2298\n",
      "Epoch: 887, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2455\n",
      "Epoch: 888, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2465\n",
      "Epoch: 889, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2355\n",
      "Epoch: 890, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2347\n",
      "Epoch: 891, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.2332\n",
      "Epoch: 892, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2358\n",
      "Epoch: 893, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2335\n",
      "Epoch: 894, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2346\n",
      "Epoch: 895, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2376\n",
      "Epoch: 896, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2359\n",
      "Epoch: 897, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2296\n",
      "Epoch: 898, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2370\n",
      "Epoch: 899, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2383\n",
      "Epoch: 900, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2341\n",
      "Epoch: 901, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2311\n",
      "Epoch: 902, train_acc: 0.9679, val_acc: 0.9429, train_loss: 0.2286\n",
      "Epoch: 903, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2322\n",
      "Epoch: 904, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2304\n",
      "Epoch: 905, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2267\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 906, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2343\n",
      "Epoch: 907, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2335\n",
      "Epoch: 908, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2333\n",
      "Epoch: 909, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2329\n",
      "Epoch: 910, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2285\n",
      "Epoch: 911, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2286\n",
      "Epoch: 912, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2260\n",
      "Epoch: 913, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2262\n",
      "Epoch: 914, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2276\n",
      "Epoch: 915, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2231\n",
      "Epoch: 916, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2321\n",
      "Epoch: 917, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2309\n",
      "Epoch: 918, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2252\n",
      "Epoch: 919, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2234\n",
      "Epoch: 920, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2290\n",
      "Epoch: 921, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2267\n",
      "Epoch: 922, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2302\n",
      "Epoch: 923, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2315\n",
      "Epoch: 924, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2306\n",
      "Epoch: 925, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2271\n",
      "Epoch: 926, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2216\n",
      "Epoch: 927, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2207\n",
      "Epoch: 928, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2245\n",
      "Epoch: 929, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2295\n",
      "Epoch: 930, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2194\n",
      "Epoch: 931, train_acc: 0.9679, val_acc: 0.9571, train_loss: 0.2224\n",
      "Epoch: 932, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2237\n",
      "Epoch: 933, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2190\n",
      "Epoch: 934, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2201\n",
      "Epoch: 935, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2195\n",
      "Epoch: 936, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2196\n",
      "Epoch: 937, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2176\n",
      "Epoch: 938, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2200\n",
      "Epoch: 939, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2178\n",
      "Epoch: 940, train_acc: 0.9679, val_acc: 0.9429, train_loss: 0.2218\n",
      "Epoch: 941, train_acc: 0.9696, val_acc: 0.9571, train_loss: 0.2223\n",
      "Epoch: 942, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2214\n",
      "Epoch: 943, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2180\n",
      "Epoch: 944, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2186\n",
      "Epoch: 945, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2208\n",
      "Epoch: 946, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2164\n",
      "Epoch: 947, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2257\n",
      "Epoch: 948, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2222\n",
      "Epoch: 949, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2141\n",
      "Epoch: 950, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2182\n",
      "Epoch: 951, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2145\n",
      "Epoch: 952, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2182\n",
      "Epoch: 953, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2219\n",
      "Epoch: 954, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2171\n",
      "Epoch: 955, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2125\n",
      "Epoch: 956, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2141\n",
      "Epoch: 957, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2125\n",
      "Epoch: 958, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2139\n",
      "Epoch: 959, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2127\n",
      "Epoch: 960, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2114\n",
      "Epoch: 961, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2141\n",
      "Epoch: 962, train_acc: 0.9732, val_acc: 0.9714, train_loss: 0.2107\n",
      "Epoch: 963, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2191\n",
      "Epoch: 964, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2191\n",
      "Epoch: 965, train_acc: 0.9696, val_acc: 0.9429, train_loss: 0.2109\n",
      "Epoch: 966, train_acc: 0.9696, val_acc: 0.9429, train_loss: 0.2198\n",
      "Epoch: 967, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2208\n",
      "Epoch: 968, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2136\n",
      "Epoch: 969, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2184\n",
      "Epoch: 970, train_acc: 0.9732, val_acc: 0.9429, train_loss: 0.2177\n",
      "Epoch: 971, train_acc: 0.9679, val_acc: 0.9714, train_loss: 0.2130\n",
      "Epoch: 972, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2167\n",
      "Epoch: 973, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2092\n",
      "Epoch: 974, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2089\n",
      "Epoch: 975, train_acc: 0.9679, val_acc: 0.9857, train_loss: 0.2072\n",
      "Val improved\n",
      "Epoch: 976, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2166\n",
      "Epoch: 977, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2118\n",
      "Epoch: 978, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2118\n",
      "Epoch: 979, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2131\n",
      "Epoch: 980, train_acc: 0.9679, val_acc: 0.9857, train_loss: 0.2105\n",
      "Epoch: 981, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2133\n",
      "Epoch: 982, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2069\n",
      "Epoch: 983, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2082\n",
      "Epoch: 984, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.2040\n",
      "Epoch: 985, train_acc: 0.9661, val_acc: 0.9571, train_loss: 0.2140\n",
      "Epoch: 986, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2138\n",
      "Epoch: 987, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2095\n",
      "Epoch: 988, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2102\n",
      "Epoch: 989, train_acc: 0.9714, val_acc: 0.9714, train_loss: 0.2085\n",
      "Epoch: 990, train_acc: 0.9714, val_acc: 0.9571, train_loss: 0.2063\n",
      "Epoch: 991, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2098\n",
      "Epoch: 992, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2076\n",
      "Epoch: 993, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2068\n",
      "Epoch: 994, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2069\n",
      "Epoch: 995, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2047\n",
      "Epoch: 996, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2012\n",
      "Epoch: 997, train_acc: 0.9732, val_acc: 0.9571, train_loss: 0.2052\n",
      "Epoch: 998, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2053\n",
      "Epoch: 999, train_acc: 0.9714, val_acc: 0.9429, train_loss: 0.2051\n",
      "975\n",
      "final train_acc:0.9678571428571429, val_acc: 0.9857142857142858, test_acc: 1.0\n"
     ]
    }
   ],
   "source": [
    "_dataset = config.model.dataset\n",
    "_explainer = config.model.paper\n",
    "\n",
    "if _dataset[:3] == \"syn\":\n",
    "    train_node(_dataset, _explainer, config.model)\n",
    "elif _dataset == \"ba2\" or _dataset == \"mutag\":\n",
    "    train_graph(_dataset, _explainer, config.model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}