{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pathlib\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "base_path = pathlib.Path(os.getcwd())\n",
    "base_path = str(base_path.parent)\n",
    "sys.path = [base_path] + sys.path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "import random as python_random\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import tensorflow_datasets as tfds\n",
    "from sklearn import metrics\n",
    "from sklearn.model_selection import train_test_split\n",
    "from tensorflow import keras\n",
    "from tensorflow.keras import backend as K\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras.utils import model_to_dot\n",
    "from IPython.display import SVG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set(context=\"notebook\", style=\"darkgrid\", palette=\"deep\", font=\"sans-serif\", font_scale=1.0, color_codes=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.makedirs(\"./img/\", exist_ok=True)\n",
    "os.makedirs(\"./score/\", exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_experimental_environment(seed=6902):\n",
    "    K.clear_session()\n",
    "\n",
    "    # The below is necessary for starting Numpy generated random numbers\n",
    "    # in a well-defined initial state.\n",
    "    np.random.seed(seed)\n",
    "\n",
    "    # The below is necessary for starting core Python generated random numbers\n",
    "    # in a well-defined state.\n",
    "    python_random.seed(seed)\n",
    "\n",
    "    # The below set_seed() will make random number generation\n",
    "    # in the TensorFlow backend have a well-defined initial state.\n",
    "    # For further details, see:\n",
    "    # https://www.tensorflow.org/api_docs/python/tf/random/set_seed\n",
    "    tf.random.set_seed(seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Driver Version: b'419.17'\n",
      "Device 0 : b'GeForce GTX 1070 Ti'\n"
     ]
    }
   ],
   "source": [
    "from pynvml import *\n",
    "\n",
    "try:\n",
    "    nvmlInit()\n",
    "    print(\"Driver Version:\", nvmlSystemGetDriverVersion())\n",
    "    deviceCount = nvmlDeviceGetCount()\n",
    "    for i in range(deviceCount):\n",
    "        handle = nvmlDeviceGetHandleByIndex(i)\n",
    "        print(\"Device\", i, \":\", nvmlDeviceGetName(handle))\n",
    "    nvmlShutdown()\n",
    "except NVMLError as error:\n",
    "    print(error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "python_version: 3.6.10.final.0 (64 bit)\n",
      "cpuinfo_version: [7, 0, 0]\n",
      "cpuinfo_version_string: 7.0.0\n",
      "arch: X86_64\n",
      "bits: 64\n",
      "count: 12\n",
      "arch_string_raw: AMD64\n",
      "vendor_id_raw: GenuineIntel\n",
      "brand_raw: Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz\n",
      "hz_advertised_friendly: 3.2000 GHz\n",
      "hz_actual_friendly: 3.1920 GHz\n",
      "hz_advertised: [3200000000, 0]\n",
      "hz_actual: [3192000000, 0]\n",
      "l2_cache_size: 1572864\n",
      "stepping: 10\n",
      "model: 158\n",
      "family: 6\n",
      "l3_cache_size: 12582912\n",
      "flags: ['3dnow', '3dnowprefetch', 'abm', 'acpi', 'adx', 'aes', 'apic', 'avx', 'avx2', 'bmi1', 'bmi2', 'clflush', 'clflushopt', 'cmov', 'cx16', 'cx8', 'de', 'dtes64', 'dts', 'erms', 'est', 'f16c', 'fma', 'fpu', 'fxsr', 'hle', 'ht', 'hypervisor', 'ia64', 'invpcid', 'lahf_lm', 'mca', 'mce', 'mmx', 'movbe', 'mpx', 'msr', 'mtrr', 'osxsave', 'pae', 'pat', 'pbe', 'pcid', 'pclmulqdq', 'pdcm', 'pge', 'pni', 'popcnt', 'pse', 'pse36', 'rdrnd', 'rdseed', 'rtm', 'sep', 'serial', 'smap', 'smep', 'ss', 'sse', 'sse2', 'sse4_1', 'sse4_2', 'ssse3', 'tm', 'tm2', 'tsc', 'vme', 'x2apic', 'xsave', 'xtpr']\n",
      "l2_cache_line_size: 256\n",
      "l2_cache_associativity: 6\n"
     ]
    }
   ],
   "source": [
    "from cpuinfo import get_cpu_info\n",
    "\n",
    "for key, value in get_cpu_info().items():\n",
    "    print(\"{0}: {1}\".format(key, value))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "memory: 15.92GB\n"
     ]
    }
   ],
   "source": [
    "import psutil \n",
    "\n",
    "mem = psutil.virtual_memory() \n",
    "print(\"memory: {0:.2f}GB\".format(mem.total / 1024**3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "BAYES_MODELS = [\n",
    "    \"MNIST-CNN\",\n",
    "]\n",
    "\n",
    "LAST_ACTIVATIONS = [\n",
    "    \"sigmoid\",\n",
    "    \"softmax\",\n",
    "]\n",
    "\n",
    "PREDICTION_MODES = [\n",
    "    \"Normal-mode\",\n",
    "    \"Linear-mode\",\n",
    "    \"Independent-mode\",\n",
    "    \"Upper-mode\",\n",
    "    \"MC-mode\",\n",
    "]\n",
    "\n",
    "DATASETS = [\n",
    "    \"MNIST\",\n",
    "    \"Fashion\",\n",
    "    \"Kuzushiji\",\n",
    "    \"Kannada\",\n",
    "    \"EMNIST-MNIST\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Args(object):\n",
    "    max_seed = 30\n",
    "    train_domain = DATASETS[1]\n",
    "    bayes_model= BAYES_MODELS[0]\n",
    "    num_mc = 2000\n",
    "    optimizer = \"Adam\"\n",
    "    train_batch_size = 128\n",
    "    test_batch_size = 2048\n",
    "    max_epochs = 100\n",
    "    patience = 10\n",
    "    rhos = [0.0, 1.0e-5, 5.0e-5, 1.0e-4, 5.0e-4, 1.0e-3, 5.0e-3, 1.0e-2, 5.0e-2, 1.0e-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def kuzushiji_load_data():\n",
    "    ds = tfds.load(\n",
    "        \"kmnist\", split=[\"train\", \"test\"], shuffle_files=False, batch_size=-1\n",
    "    )\n",
    "    ds = tfds.as_numpy(ds)\n",
    "    y_train = ds[0][\"label\"].astype(\"uint8\")\n",
    "    x_train = ds[0][\"image\"].reshape((-1, 28, 28)).astype(\"uint8\")\n",
    "    y_test = ds[1][\"label\"].astype(\"uint8\")\n",
    "    x_test = ds[1][\"image\"].reshape((-1, 28, 28)).astype(\"uint8\")\n",
    "\n",
    "    return (x_train, y_train), (x_test, y_test)\n",
    "\n",
    "\n",
    "def kannada_load_data():\n",
    "    path = tf.keras.utils.get_file(\n",
    "        \"y_kannada_MNIST_train.npz\",\n",
    "        \"https://github.com/vinayprabhu/Kannada_MNIST/raw/master/data/output_tensors/MNIST_format/y_kannada_MNIST_train.npz\",\n",
    "    )\n",
    "    y_train = np.load(path)[\"arr_0\"]\n",
    "\n",
    "    path = tf.keras.utils.get_file(\n",
    "        \"X_kannada_MNIST_train.npz\",\n",
    "        \"https://github.com/vinayprabhu/Kannada_MNIST/raw/master/data/output_tensors/MNIST_format/X_kannada_MNIST_train.npz\",\n",
    "    )\n",
    "    x_train = np.load(path)[\"arr_0\"]\n",
    "\n",
    "    path = tf.keras.utils.get_file(\n",
    "        \"y_kannada_MNIST_test.npz\",\n",
    "        \"https://github.com/vinayprabhu/Kannada_MNIST/raw/master/data/output_tensors/MNIST_format/y_kannada_MNIST_test.npz\",\n",
    "    )\n",
    "    y_test = np.load(path)[\"arr_0\"]\n",
    "\n",
    "    path = tf.keras.utils.get_file(\n",
    "        \"X_kannada_MNIST_test.npz\",\n",
    "        \"https://github.com/vinayprabhu/Kannada_MNIST/raw/master/data/output_tensors/MNIST_format/X_kannada_MNIST_test.npz\",\n",
    "    )\n",
    "    x_test = np.load(path)[\"arr_0\"]\n",
    "\n",
    "    return (x_train, y_train), (x_test, y_test)\n",
    "\n",
    "\n",
    "def emnist_mnist_load_data():\n",
    "    ds = tfds.load(\n",
    "        \"emnist/mnist\", split=[\"train\", \"test\"], shuffle_files=False, batch_size=-1\n",
    "    )\n",
    "    ds = tfds.as_numpy(ds)\n",
    "    y_train = ds[0][\"label\"].astype(\"uint8\")\n",
    "    x_train = ds[0][\"image\"].reshape((-1, 28, 28)).astype(\"uint8\")\n",
    "    y_test = ds[1][\"label\"].astype(\"uint8\")\n",
    "    x_test = ds[1][\"image\"].reshape((-1, 28, 28)).astype(\"uint8\")\n",
    "\n",
    "    x_train = np.array([x.T for x in x_train])\n",
    "    x_test = np.array([x.T for x in x_test])\n",
    "\n",
    "    return (x_train, y_train), (x_test, y_test)\n",
    "\n",
    "\n",
    "def load_dataset(dataset, val_size=1.0 / 6.0):\n",
    "    if dataset == DATASETS[0]:\n",
    "        load_data = keras.datasets.mnist.load_data\n",
    "    elif dataset == DATASETS[1]:\n",
    "        load_data = keras.datasets.fashion_mnist.load_data\n",
    "    elif dataset == DATASETS[2]:\n",
    "        load_data = kuzushiji_load_data\n",
    "    elif dataset == DATASETS[3]:\n",
    "        load_data = kannada_load_data\n",
    "    elif dataset == DATASETS[4]:\n",
    "        load_data = emnist_mnist_load_data\n",
    "    else:\n",
    "        raise ValueError(\"Error\")\n",
    "\n",
    "    # input image dimensions\n",
    "    num_classes = 10\n",
    "\n",
    "    # the data, split between train and test sets\n",
    "    (x_train, y_train), (x_test, y_test) = load_data()\n",
    "\n",
    "    # Scale images to the [0, 1] range\n",
    "    x_train = x_train.astype(\"float32\") / 255.0\n",
    "    x_test = x_test.astype(\"float32\") / 255.0\n",
    "    # Make sure images have shape (28, 28, 1)\n",
    "    x_train = np.expand_dims(x_train, -1)\n",
    "    x_test = np.expand_dims(x_test, -1)\n",
    "\n",
    "    if val_size > 0.0:\n",
    "        x_train, x_val, y_train, y_val = train_test_split(\n",
    "            x_train, y_train, test_size=val_size, stratify=y_train\n",
    "        )\n",
    "    else:\n",
    "        x_val, y_val = x_train, y_train\n",
    "\n",
    "    # convert class vectors to binary class matrices\n",
    "    y_train = keras.utils.to_categorical(y_train, num_classes)\n",
    "    y_val = keras.utils.to_categorical(y_val, num_classes)\n",
    "    y_test = keras.utils.to_categorical(y_test, num_classes)\n",
    "\n",
    "    return (x_train, y_train), (x_val, y_val), (x_test, y_test), num_classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_softmax_entropy(prob):\n",
    "    entropy = np.sum(-prob * np.log(np.maximum(prob, 1.0e-7)), axis=-1)\n",
    "    return entropy\n",
    "\n",
    "\n",
    "def calc_sigmoid_entropy(prob):\n",
    "    entropy = np.sum(\n",
    "        -prob * np.log(np.maximum(prob, 1.0e-7))\n",
    "        - (1.0 - prob) * np.log(np.maximum(1.0 - prob, 1.0e-7)),\n",
    "        axis=-1,\n",
    "    )\n",
    "    return entropy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_last_bayes_model(input_shape, output_shape, last_activation):\n",
    "    # https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py\n",
    "    inputs = keras.Input(input_shape)\n",
    "    conv1 = layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\")(inputs)\n",
    "    conv2 = layers.Conv2D(64, (3, 3), activation=\"relu\")(conv1)\n",
    "    pool1 = layers.MaxPooling2D(pool_size=(2, 2))(conv2)\n",
    "    drop1 = layers.Dropout(0.25)(pool1)\n",
    "    flat1 = layers.Flatten()(drop1)\n",
    "    dense1 = layers.Dense(128, activation=\"relu\")(flat1)\n",
    "    drop2 = layers.Dropout(0.5)(dense1)\n",
    "    dense2 = layers.Dense(output_shape, activation=last_activation)(drop2)\n",
    "    model = keras.Model(inputs=inputs, outputs=dense2)\n",
    "\n",
    "    return model\n",
    "\n",
    "\n",
    "def create_bayes_model(bayes_model_name, input_shape, output_shape, last_activation):\n",
    "    if bayes_model_name == BAYES_MODELS[0]:\n",
    "        model = create_last_bayes_model(input_shape, output_shape, last_activation)\n",
    "    else:\n",
    "        raise ValueError()\n",
    "\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "args = Args()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 6s - loss: 0.1204 - accuracy: 0.9545 - val_loss: 0.0635 - val_accuracy: 0.9751\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0718 - accuracy: 0.9728 - val_loss: 0.0535 - val_accuracy: 0.9791\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0630 - accuracy: 0.9760 - val_loss: 0.0507 - val_accuracy: 0.9800\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0571 - accuracy: 0.9780 - val_loss: 0.0459 - val_accuracy: 0.9822\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0534 - accuracy: 0.9794 - val_loss: 0.0445 - val_accuracy: 0.9825\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0487 - accuracy: 0.9811 - val_loss: 0.0437 - val_accuracy: 0.9823\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0463 - accuracy: 0.9819 - val_loss: 0.0410 - val_accuracy: 0.9837\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0438 - accuracy: 0.9830 - val_loss: 0.0411 - val_accuracy: 0.9845\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0409 - accuracy: 0.9839 - val_loss: 0.0399 - val_accuracy: 0.9844\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0390 - accuracy: 0.9847 - val_loss: 0.0399 - val_accuracy: 0.9848\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0379 - accuracy: 0.9850 - val_loss: 0.0380 - val_accuracy: 0.9855\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0355 - accuracy: 0.9859 - val_loss: 0.0390 - val_accuracy: 0.9849\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0339 - accuracy: 0.9866 - val_loss: 0.0379 - val_accuracy: 0.9856\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0329 - accuracy: 0.9869 - val_loss: 0.0387 - val_accuracy: 0.9855\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0315 - accuracy: 0.9876 - val_loss: 0.0394 - val_accuracy: 0.9855\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0302 - accuracy: 0.9880 - val_loss: 0.0399 - val_accuracy: 0.9860\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0291 - accuracy: 0.9883 - val_loss: 0.0382 - val_accuracy: 0.9859\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0280 - accuracy: 0.9889 - val_loss: 0.0404 - val_accuracy: 0.9858\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0273 - accuracy: 0.9892 - val_loss: 0.0405 - val_accuracy: 0.9855\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0260 - accuracy: 0.9897 - val_loss: 0.0404 - val_accuracy: 0.9860\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0255 - accuracy: 0.9897 - val_loss: 0.0400 - val_accuracy: 0.9860\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0245 - accuracy: 0.9901 - val_loss: 0.0412 - val_accuracy: 0.9860\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0242 - accuracy: 0.9903 - val_loss: 0.0423 - val_accuracy: 0.9860\n",
      "Epoch 00023: early stopping\n",
      "rho: 0.0, ll: 1.3138298988342285\n",
      "rho: 1e-05, ll: 1.3137892484664917\n",
      "rho: 5e-05, ll: 1.3136119842529297\n",
      "rho: 0.0001, ll: 1.3133584260940552\n",
      "rho: 0.0005, ll: 1.310228705406189\n",
      "rho: 0.001, ll: 1.30418062210083\n",
      "rho: 0.005, ll: 1.220582365989685\n",
      "rho: 0.01, ll: 1.0942329168319702\n",
      "rho: 0.05, ll: 0.36691126227378845\n",
      "rho: 0.1, ll: 0.0011684710625559092\n",
      "best_rho: 0.0, best_ll: 1.3138298988342285\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1185 - accuracy: 0.9552 - val_loss: 0.0658 - val_accuracy: 0.9744\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0722 - accuracy: 0.9724 - val_loss: 0.0552 - val_accuracy: 0.9786\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0617 - accuracy: 0.9762 - val_loss: 0.0493 - val_accuracy: 0.9803\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0553 - accuracy: 0.9789 - val_loss: 0.0465 - val_accuracy: 0.9818\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0507 - accuracy: 0.9802 - val_loss: 0.0455 - val_accuracy: 0.9822\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0479 - accuracy: 0.9814 - val_loss: 0.0415 - val_accuracy: 0.9838\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0445 - accuracy: 0.9827 - val_loss: 0.0422 - val_accuracy: 0.9837\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0418 - accuracy: 0.9835 - val_loss: 0.0397 - val_accuracy: 0.9845\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0395 - accuracy: 0.9846 - val_loss: 0.0396 - val_accuracy: 0.9846\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0381 - accuracy: 0.9850 - val_loss: 0.0387 - val_accuracy: 0.9852\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0363 - accuracy: 0.9857 - val_loss: 0.0394 - val_accuracy: 0.9849\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0342 - accuracy: 0.9865 - val_loss: 0.0374 - val_accuracy: 0.9857\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0325 - accuracy: 0.9873 - val_loss: 0.0383 - val_accuracy: 0.9854\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0314 - accuracy: 0.9874 - val_loss: 0.0392 - val_accuracy: 0.9850\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0302 - accuracy: 0.9881 - val_loss: 0.0384 - val_accuracy: 0.9856\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0287 - accuracy: 0.9885 - val_loss: 0.0380 - val_accuracy: 0.9863\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0282 - accuracy: 0.9889 - val_loss: 0.0387 - val_accuracy: 0.9861\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0269 - accuracy: 0.9893 - val_loss: 0.0406 - val_accuracy: 0.9855\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0261 - accuracy: 0.9894 - val_loss: 0.0395 - val_accuracy: 0.9858\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0251 - accuracy: 0.9899 - val_loss: 0.0384 - val_accuracy: 0.9859\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0245 - accuracy: 0.9900 - val_loss: 0.0402 - val_accuracy: 0.9858\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0237 - accuracy: 0.9904 - val_loss: 0.0420 - val_accuracy: 0.9859\n",
      "Epoch 00022: early stopping\n",
      "rho: 0.0, ll: 1.3377535343170166\n",
      "rho: 1e-05, ll: 1.3377690315246582\n",
      "rho: 5e-05, ll: 1.3378046751022339\n",
      "rho: 0.0001, ll: 1.3377944231033325\n",
      "rho: 0.0005, ll: 1.335933804512024\n",
      "rho: 0.001, ll: 1.3304733037948608\n",
      "rho: 0.005, ll: 1.242124319076538\n",
      "rho: 0.01, ll: 1.1064977645874023\n",
      "rho: 0.05, ll: 0.3454454243183136\n",
      "rho: 0.1, ll: -0.018988553434610367\n",
      "best_rho: 5e-05, best_ll: 1.3378046751022339\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1180 - accuracy: 0.9552 - val_loss: 0.0644 - val_accuracy: 0.9743\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0716 - accuracy: 0.9725 - val_loss: 0.0548 - val_accuracy: 0.9788\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0627 - accuracy: 0.9759 - val_loss: 0.0491 - val_accuracy: 0.9809\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0565 - accuracy: 0.9784 - val_loss: 0.0458 - val_accuracy: 0.9818\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0518 - accuracy: 0.9799 - val_loss: 0.0428 - val_accuracy: 0.9828\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0479 - accuracy: 0.9815 - val_loss: 0.0420 - val_accuracy: 0.9841\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0452 - accuracy: 0.9826 - val_loss: 0.0417 - val_accuracy: 0.9841\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0432 - accuracy: 0.9831 - val_loss: 0.0397 - val_accuracy: 0.9847\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0408 - accuracy: 0.9839 - val_loss: 0.0408 - val_accuracy: 0.9843\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0385 - accuracy: 0.9850 - val_loss: 0.0404 - val_accuracy: 0.9844\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0367 - accuracy: 0.9857 - val_loss: 0.0404 - val_accuracy: 0.9852\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0358 - accuracy: 0.9859 - val_loss: 0.0388 - val_accuracy: 0.9855\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0334 - accuracy: 0.9869 - val_loss: 0.0384 - val_accuracy: 0.9857\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0322 - accuracy: 0.9871 - val_loss: 0.0399 - val_accuracy: 0.9855\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0310 - accuracy: 0.9877 - val_loss: 0.0393 - val_accuracy: 0.9855\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0302 - accuracy: 0.9882 - val_loss: 0.0385 - val_accuracy: 0.9860\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0287 - accuracy: 0.9885 - val_loss: 0.0399 - val_accuracy: 0.9854\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0281 - accuracy: 0.9887 - val_loss: 0.0402 - val_accuracy: 0.9858\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0265 - accuracy: 0.9894 - val_loss: 0.0396 - val_accuracy: 0.9863\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0259 - accuracy: 0.9895 - val_loss: 0.0395 - val_accuracy: 0.9860\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0256 - accuracy: 0.9899 - val_loss: 0.0396 - val_accuracy: 0.9863\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9902 - val_loss: 0.0419 - val_accuracy: 0.9859\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0240 - accuracy: 0.9905 - val_loss: 0.0412 - val_accuracy: 0.9857\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 00023: early stopping\n",
      "rho: 0.0, ll: 1.295642375946045\n",
      "rho: 1e-05, ll: 1.2956585884094238\n",
      "rho: 5e-05, ll: 1.2957026958465576\n",
      "rho: 0.0001, ll: 1.2957103252410889\n",
      "rho: 0.0005, ll: 1.294197678565979\n",
      "rho: 0.001, ll: 1.2893959283828735\n",
      "rho: 0.005, ll: 1.2076843976974487\n",
      "rho: 0.01, ll: 1.0808016061782837\n",
      "rho: 0.05, ll: 0.35830214619636536\n",
      "rho: 0.1, ll: -0.0022738748230040073\n",
      "best_rho: 0.0001, best_ll: 1.2957103252410889\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1188 - accuracy: 0.9547 - val_loss: 0.0645 - val_accuracy: 0.9754\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0718 - accuracy: 0.9725 - val_loss: 0.0552 - val_accuracy: 0.9786\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0615 - accuracy: 0.9762 - val_loss: 0.0491 - val_accuracy: 0.9812\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0561 - accuracy: 0.9785 - val_loss: 0.0474 - val_accuracy: 0.9815\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0517 - accuracy: 0.9800 - val_loss: 0.0437 - val_accuracy: 0.9836\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0477 - accuracy: 0.9816 - val_loss: 0.0441 - val_accuracy: 0.9830\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0450 - accuracy: 0.9824 - val_loss: 0.0407 - val_accuracy: 0.9849\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0426 - accuracy: 0.9833 - val_loss: 0.0400 - val_accuracy: 0.9849\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0406 - accuracy: 0.9841 - val_loss: 0.0397 - val_accuracy: 0.9848\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0384 - accuracy: 0.9848 - val_loss: 0.0400 - val_accuracy: 0.9848\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0362 - accuracy: 0.9859 - val_loss: 0.0390 - val_accuracy: 0.9855\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0348 - accuracy: 0.9862 - val_loss: 0.0385 - val_accuracy: 0.9855\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0334 - accuracy: 0.9869 - val_loss: 0.0395 - val_accuracy: 0.9853\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0323 - accuracy: 0.9872 - val_loss: 0.0411 - val_accuracy: 0.9844\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0305 - accuracy: 0.9880 - val_loss: 0.0386 - val_accuracy: 0.9860\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0297 - accuracy: 0.9882 - val_loss: 0.0386 - val_accuracy: 0.9859\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0289 - accuracy: 0.9885 - val_loss: 0.0394 - val_accuracy: 0.9862\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0277 - accuracy: 0.9889 - val_loss: 0.0385 - val_accuracy: 0.9858\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0270 - accuracy: 0.9892 - val_loss: 0.0394 - val_accuracy: 0.9861\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0252 - accuracy: 0.9899 - val_loss: 0.0428 - val_accuracy: 0.9858\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0250 - accuracy: 0.9899 - val_loss: 0.0412 - val_accuracy: 0.9859\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0244 - accuracy: 0.9901 - val_loss: 0.0413 - val_accuracy: 0.9860\n",
      "Epoch 00022: early stopping\n",
      "rho: 0.0, ll: 1.3046590089797974\n",
      "rho: 1e-05, ll: 1.304702877998352\n",
      "rho: 5e-05, ll: 1.3048577308654785\n",
      "rho: 0.0001, ll: 1.3050060272216797\n",
      "rho: 0.0005, ll: 1.3046544790267944\n",
      "rho: 0.001, ll: 1.3013405799865723\n",
      "rho: 0.005, ll: 1.229023814201355\n",
      "rho: 0.01, ll: 1.1076772212982178\n",
      "rho: 0.05, ll: 0.37420183420181274\n",
      "rho: 0.1, ll: 0.001970567973330617\n",
      "best_rho: 0.0001, best_ll: 1.3050060272216797\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1167 - accuracy: 0.9554 - val_loss: 0.0637 - val_accuracy: 0.9750\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0713 - accuracy: 0.9725 - val_loss: 0.0526 - val_accuracy: 0.9795\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0618 - accuracy: 0.9763 - val_loss: 0.0480 - val_accuracy: 0.9812\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0562 - accuracy: 0.9784 - val_loss: 0.0452 - val_accuracy: 0.9822\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0513 - accuracy: 0.9802 - val_loss: 0.0438 - val_accuracy: 0.9825\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0481 - accuracy: 0.9814 - val_loss: 0.0411 - val_accuracy: 0.9839\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0453 - accuracy: 0.9826 - val_loss: 0.0410 - val_accuracy: 0.9840\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0423 - accuracy: 0.9832 - val_loss: 0.0380 - val_accuracy: 0.9851\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0406 - accuracy: 0.9842 - val_loss: 0.0387 - val_accuracy: 0.9850\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0381 - accuracy: 0.9850 - val_loss: 0.0371 - val_accuracy: 0.9855\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0369 - accuracy: 0.9856 - val_loss: 0.0368 - val_accuracy: 0.9859\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0349 - accuracy: 0.9862 - val_loss: 0.0369 - val_accuracy: 0.9859\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0335 - accuracy: 0.9867 - val_loss: 0.0371 - val_accuracy: 0.9858\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0323 - accuracy: 0.9872 - val_loss: 0.0384 - val_accuracy: 0.9858\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0306 - accuracy: 0.9880 - val_loss: 0.0369 - val_accuracy: 0.9865\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0302 - accuracy: 0.9879 - val_loss: 0.0377 - val_accuracy: 0.9866\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0284 - accuracy: 0.9885 - val_loss: 0.0384 - val_accuracy: 0.9868\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0275 - accuracy: 0.9890 - val_loss: 0.0377 - val_accuracy: 0.9867\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0268 - accuracy: 0.9893 - val_loss: 0.0374 - val_accuracy: 0.9870\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0259 - accuracy: 0.9899 - val_loss: 0.0378 - val_accuracy: 0.9870\n",
      "Epoch 21/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0252 - accuracy: 0.9899 - val_loss: 0.0400 - val_accuracy: 0.9865\n",
      "Epoch 00021: early stopping\n",
      "rho: 0.0, ll: 1.332130789756775\n",
      "rho: 1e-05, ll: 1.332101821899414\n",
      "rho: 5e-05, ll: 1.3319733142852783\n",
      "rho: 0.0001, ll: 1.3317832946777344\n",
      "rho: 0.0005, ll: 1.3292368650436401\n",
      "rho: 0.001, ll: 1.3240060806274414\n",
      "rho: 0.005, ll: 1.2449897527694702\n",
      "rho: 0.01, ll: 1.118821144104004\n",
      "rho: 0.05, ll: 0.37203311920166016\n",
      "rho: 0.1, ll: 0.00041608334868215024\n",
      "best_rho: 0.0, best_ll: 1.332130789756775\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1174 - accuracy: 0.9552 - val_loss: 0.0644 - val_accuracy: 0.9749\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0709 - accuracy: 0.9724 - val_loss: 0.0581 - val_accuracy: 0.9774\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0615 - accuracy: 0.9763 - val_loss: 0.0545 - val_accuracy: 0.9785\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0563 - accuracy: 0.9784 - val_loss: 0.0515 - val_accuracy: 0.9798\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0517 - accuracy: 0.9799 - val_loss: 0.0480 - val_accuracy: 0.9815\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0488 - accuracy: 0.9808 - val_loss: 0.0444 - val_accuracy: 0.9828\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0457 - accuracy: 0.9821 - val_loss: 0.0433 - val_accuracy: 0.9833\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0430 - accuracy: 0.9830 - val_loss: 0.0430 - val_accuracy: 0.9833\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0414 - accuracy: 0.9839 - val_loss: 0.0428 - val_accuracy: 0.9836\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0392 - accuracy: 0.9846 - val_loss: 0.0413 - val_accuracy: 0.9848\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0372 - accuracy: 0.9854 - val_loss: 0.0420 - val_accuracy: 0.9845\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0359 - accuracy: 0.9858 - val_loss: 0.0423 - val_accuracy: 0.9843\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0340 - accuracy: 0.9866 - val_loss: 0.0409 - val_accuracy: 0.9848\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0328 - accuracy: 0.9870 - val_loss: 0.0415 - val_accuracy: 0.9851\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0318 - accuracy: 0.9873 - val_loss: 0.0404 - val_accuracy: 0.9851\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0305 - accuracy: 0.9877 - val_loss: 0.0413 - val_accuracy: 0.9850\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0296 - accuracy: 0.9880 - val_loss: 0.0429 - val_accuracy: 0.9848\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0281 - accuracy: 0.9888 - val_loss: 0.0409 - val_accuracy: 0.9858\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0273 - accuracy: 0.9889 - val_loss: 0.0420 - val_accuracy: 0.9853\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0273 - accuracy: 0.9890 - val_loss: 0.0421 - val_accuracy: 0.9852\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0256 - accuracy: 0.9896 - val_loss: 0.0437 - val_accuracy: 0.9853\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0248 - accuracy: 0.9900 - val_loss: 0.0442 - val_accuracy: 0.9858\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9901 - val_loss: 0.0430 - val_accuracy: 0.9856\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0239 - accuracy: 0.9905 - val_loss: 0.0433 - val_accuracy: 0.9857\n",
      "Epoch 25/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0232 - accuracy: 0.9907 - val_loss: 0.0435 - val_accuracy: 0.9853\n",
      "Epoch 00025: early stopping\n",
      "rho: 0.0, ll: 1.2849841117858887\n",
      "rho: 1e-05, ll: 1.2849997282028198\n",
      "rho: 5e-05, ll: 1.2850441932678223\n",
      "rho: 0.0001, ll: 1.2850590944290161\n",
      "rho: 0.0005, ll: 1.2837880849838257\n",
      "rho: 0.001, ll: 1.279563307762146\n",
      "rho: 0.005, ll: 1.2041558027267456\n",
      "rho: 0.01, ll: 1.0837310552597046\n",
      "rho: 0.05, ll: 0.37217915058135986\n",
      "rho: 0.1, ll: 0.007833544164896011\n",
      "best_rho: 0.0001, best_ll: 1.2850590944290161\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1160 - accuracy: 0.9559 - val_loss: 0.0639 - val_accuracy: 0.9750\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0701 - accuracy: 0.9732 - val_loss: 0.0555 - val_accuracy: 0.9785\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0611 - accuracy: 0.9764 - val_loss: 0.0510 - val_accuracy: 0.9799\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0552 - accuracy: 0.9786 - val_loss: 0.0477 - val_accuracy: 0.9812\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0512 - accuracy: 0.9802 - val_loss: 0.0454 - val_accuracy: 0.9822\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0476 - accuracy: 0.9816 - val_loss: 0.0444 - val_accuracy: 0.9827\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0453 - accuracy: 0.9821 - val_loss: 0.0422 - val_accuracy: 0.9835\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0426 - accuracy: 0.9835 - val_loss: 0.0430 - val_accuracy: 0.9828\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0404 - accuracy: 0.9846 - val_loss: 0.0423 - val_accuracy: 0.9836\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0390 - accuracy: 0.9847 - val_loss: 0.0418 - val_accuracy: 0.9844\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0362 - accuracy: 0.9858 - val_loss: 0.0413 - val_accuracy: 0.9843\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0354 - accuracy: 0.9860 - val_loss: 0.0399 - val_accuracy: 0.9850\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0340 - accuracy: 0.9867 - val_loss: 0.0407 - val_accuracy: 0.9847\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0321 - accuracy: 0.9873 - val_loss: 0.0412 - val_accuracy: 0.9844\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0315 - accuracy: 0.9875 - val_loss: 0.0396 - val_accuracy: 0.9854\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0307 - accuracy: 0.9878 - val_loss: 0.0386 - val_accuracy: 0.9856\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0287 - accuracy: 0.9887 - val_loss: 0.0404 - val_accuracy: 0.9853\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0280 - accuracy: 0.9889 - val_loss: 0.0415 - val_accuracy: 0.9855\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0277 - accuracy: 0.9891 - val_loss: 0.0401 - val_accuracy: 0.9857\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0259 - accuracy: 0.9895 - val_loss: 0.0412 - val_accuracy: 0.9855\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0256 - accuracy: 0.9898 - val_loss: 0.0397 - val_accuracy: 0.9854\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9901 - val_loss: 0.0416 - val_accuracy: 0.9857\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0234 - accuracy: 0.9906 - val_loss: 0.0426 - val_accuracy: 0.9855\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0230 - accuracy: 0.9907 - val_loss: 0.0422 - val_accuracy: 0.9859\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0230 - accuracy: 0.9906 - val_loss: 0.0460 - val_accuracy: 0.9859\n",
      "Epoch 26/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0215 - accuracy: 0.9913 - val_loss: 0.0438 - val_accuracy: 0.9858\n",
      "Epoch 00026: early stopping\n",
      "rho: 0.0, ll: 1.3552788496017456\n",
      "rho: 1e-05, ll: 1.3552606105804443\n",
      "rho: 5e-05, ll: 1.3551675081253052\n",
      "rho: 0.0001, ll: 1.3550087213516235\n",
      "rho: 0.0005, ll: 1.352336049079895\n",
      "rho: 0.001, ll: 1.3464113473892212\n",
      "rho: 0.005, ll: 1.2600135803222656\n",
      "rho: 0.01, ll: 1.1290346384048462\n",
      "rho: 0.05, ll: 0.3745823800563812\n",
      "rho: 0.1, ll: 0.0004253031511325389\n",
      "best_rho: 0.0, best_ll: 1.3552788496017456\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1187 - accuracy: 0.9549 - val_loss: 0.0689 - val_accuracy: 0.9731\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0728 - accuracy: 0.9719 - val_loss: 0.0582 - val_accuracy: 0.9773\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0631 - accuracy: 0.9757 - val_loss: 0.0537 - val_accuracy: 0.9792\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0577 - accuracy: 0.9778 - val_loss: 0.0508 - val_accuracy: 0.9806\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0530 - accuracy: 0.9795 - val_loss: 0.0472 - val_accuracy: 0.9819\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0498 - accuracy: 0.9805 - val_loss: 0.0458 - val_accuracy: 0.9827\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0472 - accuracy: 0.9816 - val_loss: 0.0447 - val_accuracy: 0.9829\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0448 - accuracy: 0.9825 - val_loss: 0.0432 - val_accuracy: 0.9836\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0424 - accuracy: 0.9834 - val_loss: 0.0433 - val_accuracy: 0.9840\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0401 - accuracy: 0.9842 - val_loss: 0.0436 - val_accuracy: 0.9838\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0379 - accuracy: 0.9850 - val_loss: 0.0432 - val_accuracy: 0.9837\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0375 - accuracy: 0.9851 - val_loss: 0.0418 - val_accuracy: 0.9846\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0352 - accuracy: 0.9861 - val_loss: 0.0427 - val_accuracy: 0.9849\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0341 - accuracy: 0.9864 - val_loss: 0.0419 - val_accuracy: 0.9848\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0328 - accuracy: 0.9869 - val_loss: 0.0418 - val_accuracy: 0.9847\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0320 - accuracy: 0.9875 - val_loss: 0.0425 - val_accuracy: 0.9846\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0304 - accuracy: 0.9878 - val_loss: 0.0430 - val_accuracy: 0.9849\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0296 - accuracy: 0.9880 - val_loss: 0.0448 - val_accuracy: 0.9850\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0284 - accuracy: 0.9887 - val_loss: 0.0443 - val_accuracy: 0.9847\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0278 - accuracy: 0.9887 - val_loss: 0.0425 - val_accuracy: 0.9850\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0269 - accuracy: 0.9893 - val_loss: 0.0431 - val_accuracy: 0.9852\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0265 - accuracy: 0.9892 - val_loss: 0.0457 - val_accuracy: 0.9844\n",
      "Epoch 00022: early stopping\n",
      "rho: 0.0, ll: 1.2530548572540283\n",
      "rho: 1e-05, ll: 1.2531347274780273\n",
      "rho: 5e-05, ll: 1.2534337043762207\n",
      "rho: 0.0001, ll: 1.2537646293640137\n",
      "rho: 0.0005, ll: 1.2549585103988647\n",
      "rho: 0.001, ll: 1.2537091970443726\n",
      "rho: 0.005, ll: 1.1986042261123657\n",
      "rho: 0.01, ll: 1.0959678888320923\n",
      "rho: 0.05, ll: 0.40926823019981384\n",
      "rho: 0.1, ll: 0.029923750087618828\n",
      "best_rho: 0.0005, best_ll: 1.2549585103988647\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1233 - accuracy: 0.9526 - val_loss: 0.0645 - val_accuracy: 0.9750\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0737 - accuracy: 0.9716 - val_loss: 0.0545 - val_accuracy: 0.9793\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0642 - accuracy: 0.9753 - val_loss: 0.0496 - val_accuracy: 0.9810\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0587 - accuracy: 0.9773 - val_loss: 0.0475 - val_accuracy: 0.9817\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0544 - accuracy: 0.9790 - val_loss: 0.0454 - val_accuracy: 0.9823\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0511 - accuracy: 0.9801 - val_loss: 0.0427 - val_accuracy: 0.9836\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0480 - accuracy: 0.9812 - val_loss: 0.0424 - val_accuracy: 0.9833\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0455 - accuracy: 0.9822 - val_loss: 0.0408 - val_accuracy: 0.9838\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0427 - accuracy: 0.9834 - val_loss: 0.0410 - val_accuracy: 0.9847\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0412 - accuracy: 0.9837 - val_loss: 0.0386 - val_accuracy: 0.9853\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0396 - accuracy: 0.9845 - val_loss: 0.0382 - val_accuracy: 0.9853\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0371 - accuracy: 0.9853 - val_loss: 0.0379 - val_accuracy: 0.9857\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0351 - accuracy: 0.9860 - val_loss: 0.0371 - val_accuracy: 0.9860\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0344 - accuracy: 0.9867 - val_loss: 0.0374 - val_accuracy: 0.9861\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0331 - accuracy: 0.9870 - val_loss: 0.0379 - val_accuracy: 0.9862\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0320 - accuracy: 0.9873 - val_loss: 0.0375 - val_accuracy: 0.9867\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0311 - accuracy: 0.9875 - val_loss: 0.0377 - val_accuracy: 0.9862\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0298 - accuracy: 0.9882 - val_loss: 0.0385 - val_accuracy: 0.9858\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0289 - accuracy: 0.9885 - val_loss: 0.0379 - val_accuracy: 0.9863\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0277 - accuracy: 0.9888 - val_loss: 0.0389 - val_accuracy: 0.9868\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0267 - accuracy: 0.9893 - val_loss: 0.0400 - val_accuracy: 0.9863\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0259 - accuracy: 0.9895 - val_loss: 0.0398 - val_accuracy: 0.9870\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0252 - accuracy: 0.9900 - val_loss: 0.0395 - val_accuracy: 0.9866\n",
      "Epoch 00023: early stopping\n",
      "rho: 0.0, ll: 1.3169023990631104\n",
      "rho: 1e-05, ll: 1.3168606758117676\n",
      "rho: 5e-05, ll: 1.3166782855987549\n",
      "rho: 0.0001, ll: 1.3164169788360596\n",
      "rho: 0.0005, ll: 1.313185214996338\n",
      "rho: 0.001, ll: 1.3069401979446411\n",
      "rho: 0.005, ll: 1.220800518989563\n",
      "rho: 0.01, ll: 1.0909918546676636\n",
      "rho: 0.05, ll: 0.35771143436431885\n",
      "rho: 0.1, ll: -0.004781331401318312\n",
      "best_rho: 0.0, best_ll: 1.3169023990631104\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1182 - accuracy: 0.9549 - val_loss: 0.0650 - val_accuracy: 0.9750\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0711 - accuracy: 0.9727 - val_loss: 0.0554 - val_accuracy: 0.9783\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0615 - accuracy: 0.9765 - val_loss: 0.0509 - val_accuracy: 0.9803\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0565 - accuracy: 0.9784 - val_loss: 0.0482 - val_accuracy: 0.9812\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0519 - accuracy: 0.9801 - val_loss: 0.0454 - val_accuracy: 0.9827\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0482 - accuracy: 0.9813 - val_loss: 0.0448 - val_accuracy: 0.9827\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0456 - accuracy: 0.9824 - val_loss: 0.0423 - val_accuracy: 0.9835\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0433 - accuracy: 0.9831 - val_loss: 0.0413 - val_accuracy: 0.9842\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0406 - accuracy: 0.9842 - val_loss: 0.0425 - val_accuracy: 0.9836\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0384 - accuracy: 0.9849 - val_loss: 0.0405 - val_accuracy: 0.9844\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0367 - accuracy: 0.9855 - val_loss: 0.0415 - val_accuracy: 0.9844\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0353 - accuracy: 0.9861 - val_loss: 0.0401 - val_accuracy: 0.9845\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0334 - accuracy: 0.9868 - val_loss: 0.0405 - val_accuracy: 0.9849\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0322 - accuracy: 0.9872 - val_loss: 0.0404 - val_accuracy: 0.9852\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0308 - accuracy: 0.9879 - val_loss: 0.0395 - val_accuracy: 0.9855\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0297 - accuracy: 0.9884 - val_loss: 0.0394 - val_accuracy: 0.9856\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0292 - accuracy: 0.9884 - val_loss: 0.0397 - val_accuracy: 0.9852\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0284 - accuracy: 0.9887 - val_loss: 0.0403 - val_accuracy: 0.9857\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0267 - accuracy: 0.9893 - val_loss: 0.0408 - val_accuracy: 0.9862\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0261 - accuracy: 0.9895 - val_loss: 0.0408 - val_accuracy: 0.9856\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0257 - accuracy: 0.9897 - val_loss: 0.0406 - val_accuracy: 0.9857\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9900 - val_loss: 0.0459 - val_accuracy: 0.9853\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0244 - accuracy: 0.9903 - val_loss: 0.0442 - val_accuracy: 0.9857\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0236 - accuracy: 0.9906 - val_loss: 0.0422 - val_accuracy: 0.9860\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0232 - accuracy: 0.9906 - val_loss: 0.0437 - val_accuracy: 0.9856\n",
      "Epoch 26/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0223 - accuracy: 0.9910 - val_loss: 0.0434 - val_accuracy: 0.9860\n",
      "Epoch 00026: early stopping\n",
      "rho: 0.0, ll: 1.3534955978393555\n",
      "rho: 1e-05, ll: 1.3534430265426636\n",
      "rho: 5e-05, ll: 1.3532159328460693\n",
      "rho: 0.0001, ll: 1.352898120880127\n",
      "rho: 0.0005, ll: 1.349179983139038\n",
      "rho: 0.001, ll: 1.3423041105270386\n",
      "rho: 0.005, ll: 1.2525254487991333\n",
      "rho: 0.01, ll: 1.1208832263946533\n",
      "rho: 0.05, ll: 0.37933358550071716\n",
      "rho: 0.1, ll: 0.007631809916347265\n",
      "best_rho: 0.0, best_ll: 1.3534955978393555\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1215 - accuracy: 0.9529 - val_loss: 0.0651 - val_accuracy: 0.9744\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0739 - accuracy: 0.9715 - val_loss: 0.0569 - val_accuracy: 0.9777\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0643 - accuracy: 0.9751 - val_loss: 0.0508 - val_accuracy: 0.9804\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0585 - accuracy: 0.9775 - val_loss: 0.0471 - val_accuracy: 0.9815\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0541 - accuracy: 0.9791 - val_loss: 0.0460 - val_accuracy: 0.9815\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0505 - accuracy: 0.9805 - val_loss: 0.0432 - val_accuracy: 0.9831\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0480 - accuracy: 0.9811 - val_loss: 0.0417 - val_accuracy: 0.9833\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0451 - accuracy: 0.9825 - val_loss: 0.0412 - val_accuracy: 0.9838\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0429 - accuracy: 0.9832 - val_loss: 0.0401 - val_accuracy: 0.9848\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0411 - accuracy: 0.9839 - val_loss: 0.0408 - val_accuracy: 0.9840\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0391 - accuracy: 0.9848 - val_loss: 0.0402 - val_accuracy: 0.9847\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0374 - accuracy: 0.9853 - val_loss: 0.0402 - val_accuracy: 0.9852\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0364 - accuracy: 0.9854 - val_loss: 0.0410 - val_accuracy: 0.9847\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0348 - accuracy: 0.9862 - val_loss: 0.0391 - val_accuracy: 0.9856\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0333 - accuracy: 0.9868 - val_loss: 0.0380 - val_accuracy: 0.9860\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0325 - accuracy: 0.9871 - val_loss: 0.0392 - val_accuracy: 0.9856\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0313 - accuracy: 0.9876 - val_loss: 0.0388 - val_accuracy: 0.9856\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0302 - accuracy: 0.9879 - val_loss: 0.0393 - val_accuracy: 0.9858\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0291 - accuracy: 0.9884 - val_loss: 0.0407 - val_accuracy: 0.9859\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0281 - accuracy: 0.9887 - val_loss: 0.0387 - val_accuracy: 0.9861\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0274 - accuracy: 0.9890 - val_loss: 0.0412 - val_accuracy: 0.9849\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0267 - accuracy: 0.9893 - val_loss: 0.0402 - val_accuracy: 0.9863\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0269 - accuracy: 0.9893 - val_loss: 0.0397 - val_accuracy: 0.9857\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0252 - accuracy: 0.9899 - val_loss: 0.0404 - val_accuracy: 0.9861\n",
      "Epoch 25/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0253 - accuracy: 0.9897 - val_loss: 0.0416 - val_accuracy: 0.9861\n",
      "Epoch 00025: early stopping\n",
      "rho: 0.0, ll: 1.2866944074630737\n",
      "rho: 1e-05, ll: 1.2866816520690918\n",
      "rho: 5e-05, ll: 1.2866133451461792\n",
      "rho: 0.0001, ll: 1.2864909172058105\n",
      "rho: 0.0005, ll: 1.2842520475387573\n",
      "rho: 0.001, ll: 1.2790980339050293\n",
      "rho: 0.005, ll: 1.201280951499939\n",
      "rho: 0.01, ll: 1.0818854570388794\n",
      "rho: 0.05, ll: 0.3784029185771942\n",
      "rho: 0.1, ll: 0.01233366783708334\n",
      "best_rho: 0.0, best_ll: 1.2866944074630737\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "50000/50000 - 4s - loss: 0.1270 - accuracy: 0.9517 - val_loss: 0.0677 - val_accuracy: 0.9735\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0751 - accuracy: 0.9711 - val_loss: 0.0560 - val_accuracy: 0.9782\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0650 - accuracy: 0.9751 - val_loss: 0.0507 - val_accuracy: 0.9806\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0590 - accuracy: 0.9772 - val_loss: 0.0484 - val_accuracy: 0.9818\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0544 - accuracy: 0.9790 - val_loss: 0.0475 - val_accuracy: 0.9819\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0508 - accuracy: 0.9802 - val_loss: 0.0457 - val_accuracy: 0.9829\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0478 - accuracy: 0.9813 - val_loss: 0.0423 - val_accuracy: 0.9838\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0456 - accuracy: 0.9821 - val_loss: 0.0413 - val_accuracy: 0.9844\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0431 - accuracy: 0.9831 - val_loss: 0.0406 - val_accuracy: 0.9848\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0414 - accuracy: 0.9837 - val_loss: 0.0414 - val_accuracy: 0.9841\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0395 - accuracy: 0.9842 - val_loss: 0.0420 - val_accuracy: 0.9840\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0378 - accuracy: 0.9852 - val_loss: 0.0404 - val_accuracy: 0.9848\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0367 - accuracy: 0.9854 - val_loss: 0.0403 - val_accuracy: 0.9846\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0352 - accuracy: 0.9860 - val_loss: 0.0394 - val_accuracy: 0.9854\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0337 - accuracy: 0.9864 - val_loss: 0.0408 - val_accuracy: 0.9848\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0326 - accuracy: 0.9869 - val_loss: 0.0402 - val_accuracy: 0.9852\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0317 - accuracy: 0.9872 - val_loss: 0.0405 - val_accuracy: 0.9853\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0310 - accuracy: 0.9878 - val_loss: 0.0424 - val_accuracy: 0.9854\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0300 - accuracy: 0.9881 - val_loss: 0.0427 - val_accuracy: 0.9852\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0291 - accuracy: 0.9883 - val_loss: 0.0411 - val_accuracy: 0.9852\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0282 - accuracy: 0.9887 - val_loss: 0.0427 - val_accuracy: 0.9852\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0276 - accuracy: 0.9887 - val_loss: 0.0411 - val_accuracy: 0.9857\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0267 - accuracy: 0.9892 - val_loss: 0.0407 - val_accuracy: 0.9860\n",
      "Epoch 24/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0261 - accuracy: 0.9894 - val_loss: 0.0429 - val_accuracy: 0.9860\n",
      "Epoch 00024: early stopping\n",
      "rho: 0.0, ll: 1.191567063331604\n",
      "rho: 1e-05, ll: 1.1916372776031494\n",
      "rho: 5e-05, ll: 1.1918995380401611\n",
      "rho: 0.0001, ll: 1.192187786102295\n",
      "rho: 0.0005, ll: 1.1931324005126953\n",
      "rho: 0.001, ll: 1.1916966438293457\n",
      "rho: 0.005, ll: 1.136601209640503\n",
      "rho: 0.01, ll: 1.03666353225708\n",
      "rho: 0.05, ll: 0.3922351896762848\n",
      "rho: 0.1, ll: 0.03203491494059563\n",
      "best_rho: 0.0005, best_ll: 1.1931324005126953\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1221 - accuracy: 0.9538 - val_loss: 0.0696 - val_accuracy: 0.9727\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0729 - accuracy: 0.9720 - val_loss: 0.0599 - val_accuracy: 0.9763\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0634 - accuracy: 0.9757 - val_loss: 0.0550 - val_accuracy: 0.9787\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0575 - accuracy: 0.9778 - val_loss: 0.0509 - val_accuracy: 0.9807\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0529 - accuracy: 0.9795 - val_loss: 0.0483 - val_accuracy: 0.9812\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0498 - accuracy: 0.9808 - val_loss: 0.0494 - val_accuracy: 0.9804\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0469 - accuracy: 0.9818 - val_loss: 0.0452 - val_accuracy: 0.9828\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0442 - accuracy: 0.9828 - val_loss: 0.0440 - val_accuracy: 0.9832\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0410 - accuracy: 0.9839 - val_loss: 0.0451 - val_accuracy: 0.9827\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0396 - accuracy: 0.9843 - val_loss: 0.0435 - val_accuracy: 0.9837\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0379 - accuracy: 0.9852 - val_loss: 0.0437 - val_accuracy: 0.9841\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0359 - accuracy: 0.9857 - val_loss: 0.0441 - val_accuracy: 0.9837\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0347 - accuracy: 0.9863 - val_loss: 0.0430 - val_accuracy: 0.9841\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0333 - accuracy: 0.9869 - val_loss: 0.0421 - val_accuracy: 0.9842\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0323 - accuracy: 0.9872 - val_loss: 0.0440 - val_accuracy: 0.9839\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0303 - accuracy: 0.9879 - val_loss: 0.0430 - val_accuracy: 0.9846\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0299 - accuracy: 0.9880 - val_loss: 0.0423 - val_accuracy: 0.9850\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0289 - accuracy: 0.9886 - val_loss: 0.0440 - val_accuracy: 0.9853\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0280 - accuracy: 0.9889 - val_loss: 0.0440 - val_accuracy: 0.9854\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0268 - accuracy: 0.9892 - val_loss: 0.0428 - val_accuracy: 0.9848\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0255 - accuracy: 0.9898 - val_loss: 0.0448 - val_accuracy: 0.9851\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9902 - val_loss: 0.0455 - val_accuracy: 0.9852\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0249 - accuracy: 0.9901 - val_loss: 0.0448 - val_accuracy: 0.9853\n",
      "Epoch 24/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0237 - accuracy: 0.9905 - val_loss: 0.0450 - val_accuracy: 0.9846\n",
      "Epoch 00024: early stopping\n",
      "rho: 0.0, ll: 1.2606645822525024\n",
      "rho: 1e-05, ll: 1.260722041130066\n",
      "rho: 5e-05, ll: 1.2609308958053589\n",
      "rho: 0.0001, ll: 1.261146068572998\n",
      "rho: 0.0005, ll: 1.2613075971603394\n",
      "rho: 0.001, ll: 1.2585543394088745\n",
      "rho: 0.005, ll: 1.1895432472229004\n",
      "rho: 0.01, ll: 1.0728358030319214\n",
      "rho: 0.05, ll: 0.3646935522556305\n",
      "rho: 0.1, ll: -0.0009670854778960347\n",
      "best_rho: 0.0005, best_ll: 1.2613075971603394\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1167 - accuracy: 0.9551 - val_loss: 0.0625 - val_accuracy: 0.9767\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0694 - accuracy: 0.9733 - val_loss: 0.0543 - val_accuracy: 0.9790\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0611 - accuracy: 0.9766 - val_loss: 0.0510 - val_accuracy: 0.9804\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0552 - accuracy: 0.9787 - val_loss: 0.0478 - val_accuracy: 0.9819\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0509 - accuracy: 0.9804 - val_loss: 0.0451 - val_accuracy: 0.9827\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0480 - accuracy: 0.9812 - val_loss: 0.0440 - val_accuracy: 0.9832\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0452 - accuracy: 0.9823 - val_loss: 0.0423 - val_accuracy: 0.9842\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0425 - accuracy: 0.9834 - val_loss: 0.0410 - val_accuracy: 0.9843\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0405 - accuracy: 0.9843 - val_loss: 0.0406 - val_accuracy: 0.9850\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0381 - accuracy: 0.9850 - val_loss: 0.0411 - val_accuracy: 0.9846\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0367 - accuracy: 0.9856 - val_loss: 0.0412 - val_accuracy: 0.9852\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0348 - accuracy: 0.9864 - val_loss: 0.0383 - val_accuracy: 0.9856\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0338 - accuracy: 0.9866 - val_loss: 0.0406 - val_accuracy: 0.9847\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0320 - accuracy: 0.9875 - val_loss: 0.0404 - val_accuracy: 0.9858\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0306 - accuracy: 0.9877 - val_loss: 0.0400 - val_accuracy: 0.9855\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0296 - accuracy: 0.9882 - val_loss: 0.0426 - val_accuracy: 0.9851\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0289 - accuracy: 0.9885 - val_loss: 0.0400 - val_accuracy: 0.9862\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0271 - accuracy: 0.9891 - val_loss: 0.0422 - val_accuracy: 0.9862\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0268 - accuracy: 0.9892 - val_loss: 0.0437 - val_accuracy: 0.9858\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0256 - accuracy: 0.9898 - val_loss: 0.0397 - val_accuracy: 0.9867\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9902 - val_loss: 0.0403 - val_accuracy: 0.9866\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0240 - accuracy: 0.9904 - val_loss: 0.0422 - val_accuracy: 0.9866\n",
      "Epoch 00022: early stopping\n",
      "rho: 0.0, ll: 1.2830501794815063\n",
      "rho: 1e-05, ll: 1.283182144165039\n",
      "rho: 5e-05, ll: 1.283679485321045\n",
      "rho: 0.0001, ll: 1.2842366695404053\n",
      "rho: 0.0005, ll: 1.2866010665893555\n",
      "rho: 0.001, ll: 1.2858372926712036\n",
      "rho: 0.005, ll: 1.224582314491272\n",
      "rho: 0.01, ll: 1.1109167337417603\n",
      "rho: 0.05, ll: 0.3930380046367645\n",
      "rho: 0.1, ll: 0.016349367797374725\n",
      "best_rho: 0.0005, best_ll: 1.2866010665893555\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1302 - accuracy: 0.9495 - val_loss: 0.0639 - val_accuracy: 0.9749\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0771 - accuracy: 0.9705 - val_loss: 0.0566 - val_accuracy: 0.9779\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0677 - accuracy: 0.9741 - val_loss: 0.0506 - val_accuracy: 0.9807\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0622 - accuracy: 0.9762 - val_loss: 0.0485 - val_accuracy: 0.9815\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0575 - accuracy: 0.9777 - val_loss: 0.0462 - val_accuracy: 0.9818\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0542 - accuracy: 0.9791 - val_loss: 0.0443 - val_accuracy: 0.9829\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0513 - accuracy: 0.9801 - val_loss: 0.0437 - val_accuracy: 0.9828\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0492 - accuracy: 0.9810 - val_loss: 0.0426 - val_accuracy: 0.9832\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0471 - accuracy: 0.9817 - val_loss: 0.0399 - val_accuracy: 0.9846\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0449 - accuracy: 0.9825 - val_loss: 0.0398 - val_accuracy: 0.9847\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0428 - accuracy: 0.9830 - val_loss: 0.0399 - val_accuracy: 0.9851\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0413 - accuracy: 0.9838 - val_loss: 0.0382 - val_accuracy: 0.9854\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0397 - accuracy: 0.9843 - val_loss: 0.0384 - val_accuracy: 0.9857\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0384 - accuracy: 0.9847 - val_loss: 0.0408 - val_accuracy: 0.9849\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0374 - accuracy: 0.9852 - val_loss: 0.0376 - val_accuracy: 0.9862\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0360 - accuracy: 0.9858 - val_loss: 0.0376 - val_accuracy: 0.9861\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0349 - accuracy: 0.9860 - val_loss: 0.0389 - val_accuracy: 0.9859\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0334 - accuracy: 0.9867 - val_loss: 0.0369 - val_accuracy: 0.9865\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0328 - accuracy: 0.9868 - val_loss: 0.0391 - val_accuracy: 0.9862\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0320 - accuracy: 0.9872 - val_loss: 0.0390 - val_accuracy: 0.9855\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0313 - accuracy: 0.9873 - val_loss: 0.0395 - val_accuracy: 0.9860\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0304 - accuracy: 0.9880 - val_loss: 0.0400 - val_accuracy: 0.9863\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0301 - accuracy: 0.9879 - val_loss: 0.0390 - val_accuracy: 0.9865\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0288 - accuracy: 0.9883 - val_loss: 0.0402 - val_accuracy: 0.9862\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0283 - accuracy: 0.9886 - val_loss: 0.0398 - val_accuracy: 0.9867\n",
      "Epoch 26/100\n",
      "50000/50000 - 3s - loss: 0.0274 - accuracy: 0.9889 - val_loss: 0.0395 - val_accuracy: 0.9862\n",
      "Epoch 27/100\n",
      "50000/50000 - 3s - loss: 0.0274 - accuracy: 0.9891 - val_loss: 0.0391 - val_accuracy: 0.9866\n",
      "Epoch 28/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0262 - accuracy: 0.9894 - val_loss: 0.0413 - val_accuracy: 0.9861\n",
      "Epoch 00028: early stopping\n",
      "rho: 0.0, ll: 1.2032017707824707\n",
      "rho: 1e-05, ll: 1.2031707763671875\n",
      "rho: 5e-05, ll: 1.2030290365219116\n",
      "rho: 0.0001, ll: 1.2028136253356934\n",
      "rho: 0.0005, ll: 1.1998062133789062\n",
      "rho: 0.001, ll: 1.1936883926391602\n",
      "rho: 0.005, ll: 1.111279010772705\n",
      "rho: 0.01, ll: 0.9930861592292786\n",
      "rho: 0.05, ll: 0.33824416995048523\n",
      "rho: 0.1, ll: -0.0021382111590355635\n",
      "best_rho: 0.0, best_ll: 1.2032017707824707\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1218 - accuracy: 0.9536 - val_loss: 0.0680 - val_accuracy: 0.9735\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0728 - accuracy: 0.9719 - val_loss: 0.0583 - val_accuracy: 0.9778\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0635 - accuracy: 0.9758 - val_loss: 0.0535 - val_accuracy: 0.9793\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0575 - accuracy: 0.9778 - val_loss: 0.0492 - val_accuracy: 0.9813\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0532 - accuracy: 0.9795 - val_loss: 0.0464 - val_accuracy: 0.9818\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0499 - accuracy: 0.9807 - val_loss: 0.0450 - val_accuracy: 0.9825\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0466 - accuracy: 0.9819 - val_loss: 0.0430 - val_accuracy: 0.9834\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0448 - accuracy: 0.9824 - val_loss: 0.0437 - val_accuracy: 0.9834\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0425 - accuracy: 0.9835 - val_loss: 0.0414 - val_accuracy: 0.9843\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0403 - accuracy: 0.9841 - val_loss: 0.0415 - val_accuracy: 0.9841\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0389 - accuracy: 0.9848 - val_loss: 0.0399 - val_accuracy: 0.9844\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0371 - accuracy: 0.9855 - val_loss: 0.0386 - val_accuracy: 0.9851\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0353 - accuracy: 0.9858 - val_loss: 0.0402 - val_accuracy: 0.9852\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0344 - accuracy: 0.9866 - val_loss: 0.0393 - val_accuracy: 0.9857\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0327 - accuracy: 0.9870 - val_loss: 0.0398 - val_accuracy: 0.9855\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0321 - accuracy: 0.9870 - val_loss: 0.0403 - val_accuracy: 0.9858\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0308 - accuracy: 0.9877 - val_loss: 0.0404 - val_accuracy: 0.9856\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0298 - accuracy: 0.9882 - val_loss: 0.0383 - val_accuracy: 0.9859\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0289 - accuracy: 0.9885 - val_loss: 0.0399 - val_accuracy: 0.9854\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0276 - accuracy: 0.9889 - val_loss: 0.0400 - val_accuracy: 0.9859\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0275 - accuracy: 0.9890 - val_loss: 0.0409 - val_accuracy: 0.9858\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0263 - accuracy: 0.9895 - val_loss: 0.0420 - val_accuracy: 0.9857\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0260 - accuracy: 0.9895 - val_loss: 0.0420 - val_accuracy: 0.9860\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0249 - accuracy: 0.9900 - val_loss: 0.0420 - val_accuracy: 0.9861\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0243 - accuracy: 0.9902 - val_loss: 0.0423 - val_accuracy: 0.9858\n",
      "Epoch 26/100\n",
      "50000/50000 - 3s - loss: 0.0237 - accuracy: 0.9904 - val_loss: 0.0425 - val_accuracy: 0.9858\n",
      "Epoch 27/100\n",
      "50000/50000 - 3s - loss: 0.0232 - accuracy: 0.9906 - val_loss: 0.0434 - val_accuracy: 0.9860\n",
      "Epoch 28/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0229 - accuracy: 0.9908 - val_loss: 0.0427 - val_accuracy: 0.9856\n",
      "Epoch 00028: early stopping\n",
      "rho: 0.0, ll: 1.2746806144714355\n",
      "rho: 1e-05, ll: 1.2746384143829346\n",
      "rho: 5e-05, ll: 1.2744566202163696\n",
      "rho: 0.0001, ll: 1.274200201034546\n",
      "rho: 0.0005, ll: 1.2711507081985474\n",
      "rho: 0.001, ll: 1.2654166221618652\n",
      "rho: 0.005, ll: 1.1881088018417358\n",
      "rho: 0.01, ll: 1.072051763534546\n",
      "rho: 0.05, ll: 0.38425737619400024\n",
      "rho: 0.1, ll: 0.018839020282030106\n",
      "best_rho: 0.0, best_ll: 1.2746806144714355\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1254 - accuracy: 0.9524 - val_loss: 0.0649 - val_accuracy: 0.9748\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0741 - accuracy: 0.9717 - val_loss: 0.0567 - val_accuracy: 0.9779\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0640 - accuracy: 0.9753 - val_loss: 0.0514 - val_accuracy: 0.9798\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0586 - accuracy: 0.9774 - val_loss: 0.0482 - val_accuracy: 0.9809\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0540 - accuracy: 0.9793 - val_loss: 0.0485 - val_accuracy: 0.9807\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0505 - accuracy: 0.9806 - val_loss: 0.0447 - val_accuracy: 0.9824\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0483 - accuracy: 0.9813 - val_loss: 0.0424 - val_accuracy: 0.9830\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0455 - accuracy: 0.9823 - val_loss: 0.0418 - val_accuracy: 0.9833\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0428 - accuracy: 0.9831 - val_loss: 0.0410 - val_accuracy: 0.9838\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0410 - accuracy: 0.9839 - val_loss: 0.0403 - val_accuracy: 0.9840\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0392 - accuracy: 0.9846 - val_loss: 0.0412 - val_accuracy: 0.9842\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0379 - accuracy: 0.9850 - val_loss: 0.0393 - val_accuracy: 0.9848\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0359 - accuracy: 0.9859 - val_loss: 0.0400 - val_accuracy: 0.9844\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0350 - accuracy: 0.9861 - val_loss: 0.0393 - val_accuracy: 0.9852\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0335 - accuracy: 0.9868 - val_loss: 0.0387 - val_accuracy: 0.9852\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0327 - accuracy: 0.9871 - val_loss: 0.0390 - val_accuracy: 0.9854\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0318 - accuracy: 0.9873 - val_loss: 0.0403 - val_accuracy: 0.9847\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0307 - accuracy: 0.9878 - val_loss: 0.0409 - val_accuracy: 0.9849\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0294 - accuracy: 0.9881 - val_loss: 0.0411 - val_accuracy: 0.9854\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0286 - accuracy: 0.9886 - val_loss: 0.0415 - val_accuracy: 0.9852\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0283 - accuracy: 0.9887 - val_loss: 0.0407 - val_accuracy: 0.9856\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0272 - accuracy: 0.9890 - val_loss: 0.0410 - val_accuracy: 0.9856\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0271 - accuracy: 0.9892 - val_loss: 0.0419 - val_accuracy: 0.9856\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0262 - accuracy: 0.9894 - val_loss: 0.0411 - val_accuracy: 0.9855\n",
      "Epoch 25/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0252 - accuracy: 0.9900 - val_loss: 0.0407 - val_accuracy: 0.9855\n",
      "Epoch 00025: early stopping\n",
      "rho: 0.0, ll: 1.2300182580947876\n",
      "rho: 1e-05, ll: 1.230082631111145\n",
      "rho: 5e-05, ll: 1.2303202152252197\n",
      "rho: 0.0001, ll: 1.2305744886398315\n",
      "rho: 0.0005, ll: 1.2311359643936157\n",
      "rho: 0.001, ll: 1.2289942502975464\n",
      "rho: 0.005, ll: 1.1652830839157104\n",
      "rho: 0.01, ll: 1.0547168254852295\n",
      "rho: 0.05, ll: 0.37367284297943115\n",
      "rho: 0.1, ll: 0.012066978961229324\n",
      "best_rho: 0.0005, best_ll: 1.2311359643936157\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1224 - accuracy: 0.9532 - val_loss: 0.0663 - val_accuracy: 0.9741\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0726 - accuracy: 0.9724 - val_loss: 0.0583 - val_accuracy: 0.9773\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0630 - accuracy: 0.9758 - val_loss: 0.0552 - val_accuracy: 0.9783\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0574 - accuracy: 0.9778 - val_loss: 0.0504 - val_accuracy: 0.9801\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0533 - accuracy: 0.9794 - val_loss: 0.0467 - val_accuracy: 0.9819\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0496 - accuracy: 0.9807 - val_loss: 0.0458 - val_accuracy: 0.9823\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0470 - accuracy: 0.9818 - val_loss: 0.0455 - val_accuracy: 0.9824\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0443 - accuracy: 0.9828 - val_loss: 0.0446 - val_accuracy: 0.9832\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0426 - accuracy: 0.9835 - val_loss: 0.0437 - val_accuracy: 0.9833\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0408 - accuracy: 0.9841 - val_loss: 0.0445 - val_accuracy: 0.9830\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0392 - accuracy: 0.9846 - val_loss: 0.0419 - val_accuracy: 0.9842\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0372 - accuracy: 0.9855 - val_loss: 0.0412 - val_accuracy: 0.9842\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0357 - accuracy: 0.9858 - val_loss: 0.0429 - val_accuracy: 0.9844\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0341 - accuracy: 0.9864 - val_loss: 0.0412 - val_accuracy: 0.9847\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0330 - accuracy: 0.9868 - val_loss: 0.0424 - val_accuracy: 0.9845\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0315 - accuracy: 0.9876 - val_loss: 0.0426 - val_accuracy: 0.9846\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0305 - accuracy: 0.9879 - val_loss: 0.0410 - val_accuracy: 0.9850\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0300 - accuracy: 0.9879 - val_loss: 0.0429 - val_accuracy: 0.9843\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0291 - accuracy: 0.9884 - val_loss: 0.0448 - val_accuracy: 0.9843\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0277 - accuracy: 0.9889 - val_loss: 0.0455 - val_accuracy: 0.9854\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0272 - accuracy: 0.9892 - val_loss: 0.0450 - val_accuracy: 0.9850\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0266 - accuracy: 0.9893 - val_loss: 0.0444 - val_accuracy: 0.9852\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0259 - accuracy: 0.9896 - val_loss: 0.0436 - val_accuracy: 0.9850\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0253 - accuracy: 0.9899 - val_loss: 0.0440 - val_accuracy: 0.9855\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0247 - accuracy: 0.9902 - val_loss: 0.0451 - val_accuracy: 0.9858\n",
      "Epoch 26/100\n",
      "50000/50000 - 3s - loss: 0.0238 - accuracy: 0.9905 - val_loss: 0.0464 - val_accuracy: 0.9855\n",
      "Epoch 27/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0229 - accuracy: 0.9905 - val_loss: 0.0481 - val_accuracy: 0.9850\n",
      "Epoch 00027: early stopping\n",
      "rho: 0.0, ll: 1.2432729005813599\n",
      "rho: 1e-05, ll: 1.2432529926300049\n",
      "rho: 5e-05, ll: 1.2431561946868896\n",
      "rho: 0.0001, ll: 1.2429977655410767\n",
      "rho: 0.0005, ll: 1.2404546737670898\n",
      "rho: 0.001, ll: 1.234852910041809\n",
      "rho: 0.005, ll: 1.1526578664779663\n",
      "rho: 0.01, ll: 1.0297152996063232\n",
      "rho: 0.05, ll: 0.3380157947540283\n",
      "rho: 0.1, ll: -0.012889405712485313\n",
      "best_rho: 0.0, best_ll: 1.2432729005813599\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1181 - accuracy: 0.9548 - val_loss: 0.0681 - val_accuracy: 0.9733\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0719 - accuracy: 0.9725 - val_loss: 0.0572 - val_accuracy: 0.9776\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0620 - accuracy: 0.9762 - val_loss: 0.0529 - val_accuracy: 0.9793\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0566 - accuracy: 0.9781 - val_loss: 0.0487 - val_accuracy: 0.9808\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0523 - accuracy: 0.9798 - val_loss: 0.0506 - val_accuracy: 0.9802\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0496 - accuracy: 0.9807 - val_loss: 0.0459 - val_accuracy: 0.9818\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0463 - accuracy: 0.9820 - val_loss: 0.0440 - val_accuracy: 0.9829\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0438 - accuracy: 0.9827 - val_loss: 0.0436 - val_accuracy: 0.9831\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0423 - accuracy: 0.9835 - val_loss: 0.0447 - val_accuracy: 0.9821\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0397 - accuracy: 0.9844 - val_loss: 0.0432 - val_accuracy: 0.9835\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0382 - accuracy: 0.9850 - val_loss: 0.0419 - val_accuracy: 0.9837\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0364 - accuracy: 0.9856 - val_loss: 0.0430 - val_accuracy: 0.9837\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0351 - accuracy: 0.9862 - val_loss: 0.0413 - val_accuracy: 0.9847\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0332 - accuracy: 0.9868 - val_loss: 0.0415 - val_accuracy: 0.9843\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0327 - accuracy: 0.9871 - val_loss: 0.0428 - val_accuracy: 0.9850\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0316 - accuracy: 0.9874 - val_loss: 0.0422 - val_accuracy: 0.9847\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0302 - accuracy: 0.9878 - val_loss: 0.0421 - val_accuracy: 0.9849\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0292 - accuracy: 0.9882 - val_loss: 0.0421 - val_accuracy: 0.9850\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0283 - accuracy: 0.9885 - val_loss: 0.0418 - val_accuracy: 0.9849\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0275 - accuracy: 0.9890 - val_loss: 0.0418 - val_accuracy: 0.9851\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0262 - accuracy: 0.9895 - val_loss: 0.0416 - val_accuracy: 0.9854\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0253 - accuracy: 0.9898 - val_loss: 0.0451 - val_accuracy: 0.9847\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0249 - accuracy: 0.9899 - val_loss: 0.0452 - val_accuracy: 0.9849\n",
      "Epoch 00023: early stopping\n",
      "rho: 0.0, ll: 1.3222565650939941\n",
      "rho: 1e-05, ll: 1.3222249746322632\n",
      "rho: 5e-05, ll: 1.3220821619033813\n",
      "rho: 0.0001, ll: 1.3218693733215332\n",
      "rho: 0.0005, ll: 1.3190034627914429\n",
      "rho: 0.001, ll: 1.3132330179214478\n",
      "rho: 0.005, ll: 1.2319506406784058\n",
      "rho: 0.01, ll: 1.1078965663909912\n",
      "rho: 0.05, ll: 0.3836468756198883\n",
      "rho: 0.1, ll: 0.012913638725876808\n",
      "best_rho: 0.0, best_ll: 1.3222565650939941\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1223 - accuracy: 0.9533 - val_loss: 0.0666 - val_accuracy: 0.9740\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0741 - accuracy: 0.9715 - val_loss: 0.0582 - val_accuracy: 0.9767\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0652 - accuracy: 0.9747 - val_loss: 0.0524 - val_accuracy: 0.9795\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0594 - accuracy: 0.9771 - val_loss: 0.0494 - val_accuracy: 0.9810\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0552 - accuracy: 0.9786 - val_loss: 0.0464 - val_accuracy: 0.9821\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0519 - accuracy: 0.9799 - val_loss: 0.0449 - val_accuracy: 0.9828\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0489 - accuracy: 0.9808 - val_loss: 0.0443 - val_accuracy: 0.9830\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0460 - accuracy: 0.9819 - val_loss: 0.0418 - val_accuracy: 0.9841\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0440 - accuracy: 0.9828 - val_loss: 0.0421 - val_accuracy: 0.9842\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0418 - accuracy: 0.9834 - val_loss: 0.0410 - val_accuracy: 0.9843\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0396 - accuracy: 0.9844 - val_loss: 0.0394 - val_accuracy: 0.9851\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0386 - accuracy: 0.9845 - val_loss: 0.0393 - val_accuracy: 0.9857\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0370 - accuracy: 0.9854 - val_loss: 0.0415 - val_accuracy: 0.9848\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0356 - accuracy: 0.9858 - val_loss: 0.0394 - val_accuracy: 0.9854\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0342 - accuracy: 0.9862 - val_loss: 0.0399 - val_accuracy: 0.9858\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0330 - accuracy: 0.9869 - val_loss: 0.0385 - val_accuracy: 0.9860\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0321 - accuracy: 0.9872 - val_loss: 0.0402 - val_accuracy: 0.9859\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0307 - accuracy: 0.9879 - val_loss: 0.0402 - val_accuracy: 0.9861\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0297 - accuracy: 0.9879 - val_loss: 0.0389 - val_accuracy: 0.9867\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0290 - accuracy: 0.9882 - val_loss: 0.0418 - val_accuracy: 0.9858\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0279 - accuracy: 0.9887 - val_loss: 0.0403 - val_accuracy: 0.9865\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0275 - accuracy: 0.9890 - val_loss: 0.0413 - val_accuracy: 0.9866\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0268 - accuracy: 0.9891 - val_loss: 0.0389 - val_accuracy: 0.9868\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0262 - accuracy: 0.9895 - val_loss: 0.0399 - val_accuracy: 0.9867\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0252 - accuracy: 0.9899 - val_loss: 0.0409 - val_accuracy: 0.9868\n",
      "Epoch 26/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0247 - accuracy: 0.9899 - val_loss: 0.0414 - val_accuracy: 0.9866\n",
      "Epoch 00026: early stopping\n",
      "rho: 0.0, ll: 1.2574812173843384\n",
      "rho: 1e-05, ll: 1.257466197013855\n",
      "rho: 5e-05, ll: 1.257389783859253\n",
      "rho: 0.0001, ll: 1.2572582960128784\n",
      "rho: 0.0005, ll: 1.2549821138381958\n",
      "rho: 0.001, ll: 1.249832272529602\n",
      "rho: 0.005, ll: 1.1732041835784912\n",
      "rho: 0.01, ll: 1.0567113161087036\n",
      "rho: 0.05, ll: 0.3749525249004364\n",
      "rho: 0.1, ll: 0.016339588910341263\n",
      "best_rho: 0.0, best_ll: 1.2574812173843384\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1211 - accuracy: 0.9536 - val_loss: 0.0650 - val_accuracy: 0.9754\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0727 - accuracy: 0.9720 - val_loss: 0.0552 - val_accuracy: 0.9792\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0632 - accuracy: 0.9758 - val_loss: 0.0511 - val_accuracy: 0.9811\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0571 - accuracy: 0.9781 - val_loss: 0.0483 - val_accuracy: 0.9818\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0530 - accuracy: 0.9795 - val_loss: 0.0442 - val_accuracy: 0.9827\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0487 - accuracy: 0.9811 - val_loss: 0.0429 - val_accuracy: 0.9839\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0459 - accuracy: 0.9822 - val_loss: 0.0410 - val_accuracy: 0.9843\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0430 - accuracy: 0.9834 - val_loss: 0.0408 - val_accuracy: 0.9845\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0411 - accuracy: 0.9839 - val_loss: 0.0402 - val_accuracy: 0.9845\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0391 - accuracy: 0.9847 - val_loss: 0.0398 - val_accuracy: 0.9850\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0373 - accuracy: 0.9854 - val_loss: 0.0387 - val_accuracy: 0.9852\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0354 - accuracy: 0.9859 - val_loss: 0.0390 - val_accuracy: 0.9852\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0342 - accuracy: 0.9864 - val_loss: 0.0413 - val_accuracy: 0.9847\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0326 - accuracy: 0.9871 - val_loss: 0.0403 - val_accuracy: 0.9851\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0317 - accuracy: 0.9872 - val_loss: 0.0392 - val_accuracy: 0.9859\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0306 - accuracy: 0.9878 - val_loss: 0.0386 - val_accuracy: 0.9856\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0289 - accuracy: 0.9885 - val_loss: 0.0406 - val_accuracy: 0.9853\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0278 - accuracy: 0.9889 - val_loss: 0.0402 - val_accuracy: 0.9857\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0274 - accuracy: 0.9889 - val_loss: 0.0387 - val_accuracy: 0.9857\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0265 - accuracy: 0.9894 - val_loss: 0.0395 - val_accuracy: 0.9859\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0253 - accuracy: 0.9898 - val_loss: 0.0423 - val_accuracy: 0.9852\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0251 - accuracy: 0.9899 - val_loss: 0.0415 - val_accuracy: 0.9858\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0245 - accuracy: 0.9901 - val_loss: 0.0412 - val_accuracy: 0.9858\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0240 - accuracy: 0.9903 - val_loss: 0.0433 - val_accuracy: 0.9855\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0228 - accuracy: 0.9908 - val_loss: 0.0424 - val_accuracy: 0.9860\n",
      "Epoch 26/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0229 - accuracy: 0.9908 - val_loss: 0.0436 - val_accuracy: 0.9856\n",
      "Epoch 00026: early stopping\n",
      "rho: 0.0, ll: 1.3251339197158813\n",
      "rho: 1e-05, ll: 1.3250455856323242\n",
      "rho: 5e-05, ll: 1.3246759176254272\n",
      "rho: 0.0001, ll: 1.32417893409729\n",
      "rho: 0.0005, ll: 1.319019079208374\n",
      "rho: 0.001, ll: 1.3103700876235962\n",
      "rho: 0.005, ll: 1.2093369960784912\n",
      "rho: 0.01, ll: 1.0690836906433105\n",
      "rho: 0.05, ll: 0.3276386260986328\n",
      "rho: 0.1, ll: -0.02442021295428276\n",
      "best_rho: 0.0, best_ll: 1.3251339197158813\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1207 - accuracy: 0.9539 - val_loss: 0.0656 - val_accuracy: 0.9743\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0713 - accuracy: 0.9725 - val_loss: 0.0555 - val_accuracy: 0.9777\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0620 - accuracy: 0.9761 - val_loss: 0.0493 - val_accuracy: 0.9799\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0556 - accuracy: 0.9786 - val_loss: 0.0466 - val_accuracy: 0.9810\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0512 - accuracy: 0.9803 - val_loss: 0.0439 - val_accuracy: 0.9825\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0476 - accuracy: 0.9814 - val_loss: 0.0429 - val_accuracy: 0.9830\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0445 - accuracy: 0.9826 - val_loss: 0.0417 - val_accuracy: 0.9833\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0423 - accuracy: 0.9833 - val_loss: 0.0397 - val_accuracy: 0.9845\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0397 - accuracy: 0.9845 - val_loss: 0.0406 - val_accuracy: 0.9838\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0380 - accuracy: 0.9852 - val_loss: 0.0400 - val_accuracy: 0.9842\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0366 - accuracy: 0.9855 - val_loss: 0.0394 - val_accuracy: 0.9848\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0346 - accuracy: 0.9865 - val_loss: 0.0383 - val_accuracy: 0.9851\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0327 - accuracy: 0.9870 - val_loss: 0.0390 - val_accuracy: 0.9854\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0318 - accuracy: 0.9875 - val_loss: 0.0388 - val_accuracy: 0.9857\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0306 - accuracy: 0.9879 - val_loss: 0.0395 - val_accuracy: 0.9853\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0292 - accuracy: 0.9882 - val_loss: 0.0405 - val_accuracy: 0.9858\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0284 - accuracy: 0.9887 - val_loss: 0.0396 - val_accuracy: 0.9858\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0273 - accuracy: 0.9892 - val_loss: 0.0388 - val_accuracy: 0.9857\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0261 - accuracy: 0.9895 - val_loss: 0.0402 - val_accuracy: 0.9859\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0254 - accuracy: 0.9899 - val_loss: 0.0394 - val_accuracy: 0.9857\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9901 - val_loss: 0.0407 - val_accuracy: 0.9858\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0240 - accuracy: 0.9903 - val_loss: 0.0422 - val_accuracy: 0.9856\n",
      "Epoch 00022: early stopping\n",
      "rho: 0.0, ll: 1.331358551979065\n",
      "rho: 1e-05, ll: 1.3313252925872803\n",
      "rho: 5e-05, ll: 1.3311792612075806\n",
      "rho: 0.0001, ll: 1.3309690952301025\n",
      "rho: 0.0005, ll: 1.328320026397705\n",
      "rho: 0.001, ll: 1.3230926990509033\n",
      "rho: 0.005, ll: 1.247099757194519\n",
      "rho: 0.01, ll: 1.1271746158599854\n",
      "rho: 0.05, ll: 0.39673304557800293\n",
      "rho: 0.1, ll: 0.017098749056458473\n",
      "best_rho: 0.0, best_ll: 1.331358551979065\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1244 - accuracy: 0.9526 - val_loss: 0.0669 - val_accuracy: 0.9742\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0749 - accuracy: 0.9714 - val_loss: 0.0552 - val_accuracy: 0.9783\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0659 - accuracy: 0.9747 - val_loss: 0.0502 - val_accuracy: 0.9807\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0599 - accuracy: 0.9768 - val_loss: 0.0499 - val_accuracy: 0.9806\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0558 - accuracy: 0.9785 - val_loss: 0.0442 - val_accuracy: 0.9824\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0525 - accuracy: 0.9796 - val_loss: 0.0424 - val_accuracy: 0.9834\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0497 - accuracy: 0.9807 - val_loss: 0.0410 - val_accuracy: 0.9840\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0469 - accuracy: 0.9820 - val_loss: 0.0403 - val_accuracy: 0.9840\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0451 - accuracy: 0.9825 - val_loss: 0.0412 - val_accuracy: 0.9835\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0429 - accuracy: 0.9831 - val_loss: 0.0396 - val_accuracy: 0.9847\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0410 - accuracy: 0.9840 - val_loss: 0.0395 - val_accuracy: 0.9842\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0394 - accuracy: 0.9847 - val_loss: 0.0375 - val_accuracy: 0.9854\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0378 - accuracy: 0.9850 - val_loss: 0.0385 - val_accuracy: 0.9852\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0371 - accuracy: 0.9854 - val_loss: 0.0381 - val_accuracy: 0.9854\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0353 - accuracy: 0.9861 - val_loss: 0.0373 - val_accuracy: 0.9856\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0339 - accuracy: 0.9866 - val_loss: 0.0376 - val_accuracy: 0.9855\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0331 - accuracy: 0.9870 - val_loss: 0.0386 - val_accuracy: 0.9857\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0320 - accuracy: 0.9874 - val_loss: 0.0371 - val_accuracy: 0.9862\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0313 - accuracy: 0.9877 - val_loss: 0.0384 - val_accuracy: 0.9854\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0300 - accuracy: 0.9881 - val_loss: 0.0397 - val_accuracy: 0.9856\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0293 - accuracy: 0.9884 - val_loss: 0.0393 - val_accuracy: 0.9858\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0283 - accuracy: 0.9887 - val_loss: 0.0389 - val_accuracy: 0.9858\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0272 - accuracy: 0.9891 - val_loss: 0.0388 - val_accuracy: 0.9862\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0265 - accuracy: 0.9893 - val_loss: 0.0394 - val_accuracy: 0.9861\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0259 - accuracy: 0.9895 - val_loss: 0.0398 - val_accuracy: 0.9859\n",
      "Epoch 26/100\n",
      "50000/50000 - 3s - loss: 0.0255 - accuracy: 0.9898 - val_loss: 0.0393 - val_accuracy: 0.9858\n",
      "Epoch 27/100\n",
      "50000/50000 - 3s - loss: 0.0251 - accuracy: 0.9898 - val_loss: 0.0405 - val_accuracy: 0.9859\n",
      "Epoch 28/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0242 - accuracy: 0.9904 - val_loss: 0.0402 - val_accuracy: 0.9861\n",
      "Epoch 00028: early stopping\n",
      "rho: 0.0, ll: 1.2482659816741943\n",
      "rho: 1e-05, ll: 1.2483035326004028\n",
      "rho: 5e-05, ll: 1.248428463935852\n",
      "rho: 0.0001, ll: 1.2485305070877075\n",
      "rho: 0.0005, ll: 1.2475535869598389\n",
      "rho: 0.001, ll: 1.243093729019165\n",
      "rho: 0.005, ll: 1.1622505187988281\n",
      "rho: 0.01, ll: 1.0389398336410522\n",
      "rho: 0.05, ll: 0.3487269878387451\n",
      "rho: 0.1, ll: -0.0029104133136570454\n",
      "best_rho: 0.0001, best_ll: 1.2485305070877075\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1194 - accuracy: 0.9550 - val_loss: 0.0667 - val_accuracy: 0.9736\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0717 - accuracy: 0.9725 - val_loss: 0.0582 - val_accuracy: 0.9765\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0621 - accuracy: 0.9762 - val_loss: 0.0535 - val_accuracy: 0.9793\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0563 - accuracy: 0.9783 - val_loss: 0.0489 - val_accuracy: 0.9810\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0528 - accuracy: 0.9797 - val_loss: 0.0476 - val_accuracy: 0.9814\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0489 - accuracy: 0.9809 - val_loss: 0.0456 - val_accuracy: 0.9820\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0455 - accuracy: 0.9823 - val_loss: 0.0432 - val_accuracy: 0.9833\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0434 - accuracy: 0.9831 - val_loss: 0.0426 - val_accuracy: 0.9835\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0409 - accuracy: 0.9841 - val_loss: 0.0408 - val_accuracy: 0.9843\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0387 - accuracy: 0.9848 - val_loss: 0.0414 - val_accuracy: 0.9839\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0372 - accuracy: 0.9854 - val_loss: 0.0395 - val_accuracy: 0.9849\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0356 - accuracy: 0.9860 - val_loss: 0.0414 - val_accuracy: 0.9845\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0340 - accuracy: 0.9866 - val_loss: 0.0413 - val_accuracy: 0.9847\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0324 - accuracy: 0.9871 - val_loss: 0.0407 - val_accuracy: 0.9848\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0308 - accuracy: 0.9876 - val_loss: 0.0406 - val_accuracy: 0.9846\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0296 - accuracy: 0.9881 - val_loss: 0.0399 - val_accuracy: 0.9854\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0282 - accuracy: 0.9886 - val_loss: 0.0386 - val_accuracy: 0.9858\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0277 - accuracy: 0.9889 - val_loss: 0.0416 - val_accuracy: 0.9852\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0267 - accuracy: 0.9893 - val_loss: 0.0413 - val_accuracy: 0.9862\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0258 - accuracy: 0.9897 - val_loss: 0.0408 - val_accuracy: 0.9856\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0251 - accuracy: 0.9898 - val_loss: 0.0439 - val_accuracy: 0.9852\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0239 - accuracy: 0.9903 - val_loss: 0.0431 - val_accuracy: 0.9854\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0239 - accuracy: 0.9905 - val_loss: 0.0454 - val_accuracy: 0.9848\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0228 - accuracy: 0.9909 - val_loss: 0.0457 - val_accuracy: 0.9852\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0224 - accuracy: 0.9909 - val_loss: 0.0462 - val_accuracy: 0.9847\n",
      "Epoch 26/100\n",
      "50000/50000 - 3s - loss: 0.0220 - accuracy: 0.9911 - val_loss: 0.0458 - val_accuracy: 0.9852\n",
      "Epoch 27/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0213 - accuracy: 0.9913 - val_loss: 0.0434 - val_accuracy: 0.9858\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 00027: early stopping\n",
      "rho: 0.0, ll: 1.3492050170898438\n",
      "rho: 1e-05, ll: 1.3491398096084595\n",
      "rho: 5e-05, ll: 1.3488610982894897\n",
      "rho: 0.0001, ll: 1.3484739065170288\n",
      "rho: 0.0005, ll: 1.3440715074539185\n",
      "rho: 0.001, ll: 1.3361480236053467\n",
      "rho: 0.005, ll: 1.2370405197143555\n",
      "rho: 0.01, ll: 1.0956997871398926\n",
      "rho: 0.05, ll: 0.3412929177284241\n",
      "rho: 0.1, ll: -0.01609661430120468\n",
      "best_rho: 0.0, best_ll: 1.3492050170898438\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1241 - accuracy: 0.9524 - val_loss: 0.0708 - val_accuracy: 0.9712\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0745 - accuracy: 0.9714 - val_loss: 0.0582 - val_accuracy: 0.9767\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0651 - accuracy: 0.9752 - val_loss: 0.0522 - val_accuracy: 0.9797\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0593 - accuracy: 0.9771 - val_loss: 0.0491 - val_accuracy: 0.9803\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0552 - accuracy: 0.9785 - val_loss: 0.0458 - val_accuracy: 0.9821\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0512 - accuracy: 0.9801 - val_loss: 0.0441 - val_accuracy: 0.9826\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0483 - accuracy: 0.9810 - val_loss: 0.0440 - val_accuracy: 0.9824\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0460 - accuracy: 0.9824 - val_loss: 0.0429 - val_accuracy: 0.9829\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0438 - accuracy: 0.9827 - val_loss: 0.0418 - val_accuracy: 0.9839\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0421 - accuracy: 0.9836 - val_loss: 0.0407 - val_accuracy: 0.9842\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0402 - accuracy: 0.9842 - val_loss: 0.0394 - val_accuracy: 0.9848\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0390 - accuracy: 0.9846 - val_loss: 0.0409 - val_accuracy: 0.9838\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0373 - accuracy: 0.9852 - val_loss: 0.0389 - val_accuracy: 0.9851\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0359 - accuracy: 0.9856 - val_loss: 0.0403 - val_accuracy: 0.9847\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0347 - accuracy: 0.9861 - val_loss: 0.0398 - val_accuracy: 0.9849\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0338 - accuracy: 0.9865 - val_loss: 0.0400 - val_accuracy: 0.9848\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0325 - accuracy: 0.9871 - val_loss: 0.0405 - val_accuracy: 0.9849\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0309 - accuracy: 0.9875 - val_loss: 0.0398 - val_accuracy: 0.9851\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0299 - accuracy: 0.9880 - val_loss: 0.0401 - val_accuracy: 0.9854\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0297 - accuracy: 0.9879 - val_loss: 0.0402 - val_accuracy: 0.9860\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0289 - accuracy: 0.9885 - val_loss: 0.0404 - val_accuracy: 0.9857\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0284 - accuracy: 0.9887 - val_loss: 0.0404 - val_accuracy: 0.9855\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0276 - accuracy: 0.9887 - val_loss: 0.0392 - val_accuracy: 0.9856\n",
      "Epoch 00023: early stopping\n",
      "rho: 0.0, ll: 1.2442858219146729\n",
      "rho: 1e-05, ll: 1.244268774986267\n",
      "rho: 5e-05, ll: 1.2441861629486084\n",
      "rho: 0.0001, ll: 1.2440507411956787\n",
      "rho: 0.0005, ll: 1.2418595552444458\n",
      "rho: 0.001, ll: 1.2369751930236816\n",
      "rho: 0.005, ll: 1.162076711654663\n",
      "rho: 0.01, ll: 1.0452723503112793\n",
      "rho: 0.05, ll: 0.3614712357521057\n",
      "rho: 0.1, ll: 0.005316780414432287\n",
      "best_rho: 0.0, best_ll: 1.2442858219146729\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1174 - accuracy: 0.9555 - val_loss: 0.0642 - val_accuracy: 0.9748\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0692 - accuracy: 0.9735 - val_loss: 0.0573 - val_accuracy: 0.9774\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0601 - accuracy: 0.9766 - val_loss: 0.0506 - val_accuracy: 0.9802\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0548 - accuracy: 0.9790 - val_loss: 0.0479 - val_accuracy: 0.9816\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0503 - accuracy: 0.9807 - val_loss: 0.0476 - val_accuracy: 0.9815\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0469 - accuracy: 0.9816 - val_loss: 0.0439 - val_accuracy: 0.9830\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0433 - accuracy: 0.9831 - val_loss: 0.0438 - val_accuracy: 0.9829\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0413 - accuracy: 0.9837 - val_loss: 0.0445 - val_accuracy: 0.9830\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0388 - accuracy: 0.9847 - val_loss: 0.0437 - val_accuracy: 0.9833\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0370 - accuracy: 0.9855 - val_loss: 0.0418 - val_accuracy: 0.9844\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0354 - accuracy: 0.9860 - val_loss: 0.0436 - val_accuracy: 0.9840\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0335 - accuracy: 0.9867 - val_loss: 0.0415 - val_accuracy: 0.9847\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0321 - accuracy: 0.9871 - val_loss: 0.0424 - val_accuracy: 0.9844\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0307 - accuracy: 0.9878 - val_loss: 0.0411 - val_accuracy: 0.9850\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0301 - accuracy: 0.9882 - val_loss: 0.0426 - val_accuracy: 0.9846\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0287 - accuracy: 0.9887 - val_loss: 0.0410 - val_accuracy: 0.9851\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0268 - accuracy: 0.9893 - val_loss: 0.0445 - val_accuracy: 0.9844\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0265 - accuracy: 0.9894 - val_loss: 0.0438 - val_accuracy: 0.9850\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0251 - accuracy: 0.9899 - val_loss: 0.0456 - val_accuracy: 0.9849\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0246 - accuracy: 0.9902 - val_loss: 0.0439 - val_accuracy: 0.9854\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0235 - accuracy: 0.9904 - val_loss: 0.0451 - val_accuracy: 0.9850\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0231 - accuracy: 0.9907 - val_loss: 0.0459 - val_accuracy: 0.9853\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0223 - accuracy: 0.9910 - val_loss: 0.0449 - val_accuracy: 0.9852\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0221 - accuracy: 0.9910 - val_loss: 0.0445 - val_accuracy: 0.9858\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0207 - accuracy: 0.9915 - val_loss: 0.0471 - val_accuracy: 0.9854\n",
      "Epoch 26/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0202 - accuracy: 0.9918 - val_loss: 0.0470 - val_accuracy: 0.9860\n",
      "Epoch 00026: early stopping\n",
      "rho: 0.0, ll: 1.3677822351455688\n",
      "rho: 1e-05, ll: 1.3677382469177246\n",
      "rho: 5e-05, ll: 1.367545485496521\n",
      "rho: 0.0001, ll: 1.3672667741775513\n",
      "rho: 0.0005, ll: 1.3637588024139404\n",
      "rho: 0.001, ll: 1.3569183349609375\n",
      "rho: 0.005, ll: 1.2630122900009155\n",
      "rho: 0.01, ll: 1.123369812965393\n",
      "rho: 0.05, ll: 0.3556321859359741\n",
      "rho: 0.1, ll: -0.009769909083843231\n",
      "best_rho: 0.0, best_ll: 1.3677822351455688\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1230 - accuracy: 0.9528 - val_loss: 0.0639 - val_accuracy: 0.9755\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0725 - accuracy: 0.9724 - val_loss: 0.0550 - val_accuracy: 0.9789\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0632 - accuracy: 0.9758 - val_loss: 0.0507 - val_accuracy: 0.9807\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0574 - accuracy: 0.9780 - val_loss: 0.0486 - val_accuracy: 0.9814\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0527 - accuracy: 0.9797 - val_loss: 0.0459 - val_accuracy: 0.9822\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0491 - accuracy: 0.9811 - val_loss: 0.0441 - val_accuracy: 0.9832\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0461 - accuracy: 0.9821 - val_loss: 0.0424 - val_accuracy: 0.9840\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0443 - accuracy: 0.9828 - val_loss: 0.0427 - val_accuracy: 0.9838\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0415 - accuracy: 0.9838 - val_loss: 0.0406 - val_accuracy: 0.9844\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0393 - accuracy: 0.9847 - val_loss: 0.0419 - val_accuracy: 0.9841\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0378 - accuracy: 0.9851 - val_loss: 0.0402 - val_accuracy: 0.9846\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0359 - accuracy: 0.9857 - val_loss: 0.0419 - val_accuracy: 0.9842\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0345 - accuracy: 0.9864 - val_loss: 0.0400 - val_accuracy: 0.9850\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0331 - accuracy: 0.9869 - val_loss: 0.0406 - val_accuracy: 0.9850\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0312 - accuracy: 0.9876 - val_loss: 0.0415 - val_accuracy: 0.9849\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0309 - accuracy: 0.9876 - val_loss: 0.0409 - val_accuracy: 0.9851\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0299 - accuracy: 0.9882 - val_loss: 0.0410 - val_accuracy: 0.9854\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0288 - accuracy: 0.9884 - val_loss: 0.0412 - val_accuracy: 0.9859\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0275 - accuracy: 0.9890 - val_loss: 0.0396 - val_accuracy: 0.9858\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0265 - accuracy: 0.9894 - val_loss: 0.0418 - val_accuracy: 0.9859\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0260 - accuracy: 0.9895 - val_loss: 0.0434 - val_accuracy: 0.9851\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0255 - accuracy: 0.9899 - val_loss: 0.0424 - val_accuracy: 0.9858\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0247 - accuracy: 0.9901 - val_loss: 0.0432 - val_accuracy: 0.9857\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0239 - accuracy: 0.9903 - val_loss: 0.0427 - val_accuracy: 0.9861\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0234 - accuracy: 0.9905 - val_loss: 0.0450 - val_accuracy: 0.9853\n",
      "Epoch 26/100\n",
      "50000/50000 - 3s - loss: 0.0230 - accuracy: 0.9908 - val_loss: 0.0428 - val_accuracy: 0.9860\n",
      "Epoch 27/100\n",
      "50000/50000 - 3s - loss: 0.0222 - accuracy: 0.9911 - val_loss: 0.0441 - val_accuracy: 0.9861\n",
      "Epoch 28/100\n",
      "50000/50000 - 3s - loss: 0.0220 - accuracy: 0.9910 - val_loss: 0.0454 - val_accuracy: 0.9856\n",
      "Epoch 29/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0219 - accuracy: 0.9912 - val_loss: 0.0455 - val_accuracy: 0.9859\n",
      "Epoch 00029: early stopping\n",
      "rho: 0.0, ll: 1.2818387746810913\n",
      "rho: 1e-05, ll: 1.2818211317062378\n",
      "rho: 5e-05, ll: 1.2817329168319702\n",
      "rho: 0.0001, ll: 1.2815861701965332\n",
      "rho: 0.0005, ll: 1.279172658920288\n",
      "rho: 0.001, ll: 1.2738195657730103\n",
      "rho: 0.005, ll: 1.1942930221557617\n",
      "rho: 0.01, ll: 1.0727452039718628\n",
      "rho: 0.05, ll: 0.3655622601509094\n",
      "rho: 0.1, ll: 0.0030684121884405613\n",
      "best_rho: 0.0, best_ll: 1.2818387746810913\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1196 - accuracy: 0.9545 - val_loss: 0.0668 - val_accuracy: 0.9734\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0718 - accuracy: 0.9724 - val_loss: 0.0571 - val_accuracy: 0.9772\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0631 - accuracy: 0.9757 - val_loss: 0.0534 - val_accuracy: 0.9792\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0569 - accuracy: 0.9782 - val_loss: 0.0485 - val_accuracy: 0.9806\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0532 - accuracy: 0.9795 - val_loss: 0.0462 - val_accuracy: 0.9821\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0494 - accuracy: 0.9809 - val_loss: 0.0456 - val_accuracy: 0.9823\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0463 - accuracy: 0.9820 - val_loss: 0.0428 - val_accuracy: 0.9835\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0437 - accuracy: 0.9830 - val_loss: 0.0419 - val_accuracy: 0.9838\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0420 - accuracy: 0.9836 - val_loss: 0.0419 - val_accuracy: 0.9841\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0396 - accuracy: 0.9843 - val_loss: 0.0414 - val_accuracy: 0.9841\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0379 - accuracy: 0.9851 - val_loss: 0.0417 - val_accuracy: 0.9838\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0363 - accuracy: 0.9857 - val_loss: 0.0396 - val_accuracy: 0.9850\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0351 - accuracy: 0.9860 - val_loss: 0.0406 - val_accuracy: 0.9850\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0335 - accuracy: 0.9868 - val_loss: 0.0399 - val_accuracy: 0.9851\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0324 - accuracy: 0.9871 - val_loss: 0.0405 - val_accuracy: 0.9850\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0311 - accuracy: 0.9875 - val_loss: 0.0414 - val_accuracy: 0.9854\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0300 - accuracy: 0.9880 - val_loss: 0.0415 - val_accuracy: 0.9855\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0292 - accuracy: 0.9884 - val_loss: 0.0448 - val_accuracy: 0.9839\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0278 - accuracy: 0.9888 - val_loss: 0.0409 - val_accuracy: 0.9855\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0279 - accuracy: 0.9889 - val_loss: 0.0407 - val_accuracy: 0.9855\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0273 - accuracy: 0.9891 - val_loss: 0.0419 - val_accuracy: 0.9856\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0260 - accuracy: 0.9896 - val_loss: 0.0423 - val_accuracy: 0.9855\n",
      "Epoch 00022: early stopping\n",
      "rho: 0.0, ll: 1.2948036193847656\n",
      "rho: 1e-05, ll: 1.2948163747787476\n",
      "rho: 5e-05, ll: 1.2948530912399292\n",
      "rho: 0.0001, ll: 1.2948671579360962\n",
      "rho: 0.0005, ll: 1.2938638925552368\n",
      "rho: 0.001, ll: 1.2904342412948608\n",
      "rho: 0.005, ll: 1.2247241735458374\n",
      "rho: 0.01, ll: 1.1136213541030884\n",
      "rho: 0.05, ll: 0.407029926776886\n",
      "rho: 0.1, ll: 0.02596547082066536\n",
      "best_rho: 0.0001, best_ll: 1.2948671579360962\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1317 - accuracy: 0.9497 - val_loss: 0.0672 - val_accuracy: 0.9736\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0787 - accuracy: 0.9699 - val_loss: 0.0584 - val_accuracy: 0.9768\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0695 - accuracy: 0.9733 - val_loss: 0.0521 - val_accuracy: 0.9795\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0624 - accuracy: 0.9760 - val_loss: 0.0488 - val_accuracy: 0.9809\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0588 - accuracy: 0.9773 - val_loss: 0.0466 - val_accuracy: 0.9819\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0547 - accuracy: 0.9785 - val_loss: 0.0447 - val_accuracy: 0.9825\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0524 - accuracy: 0.9793 - val_loss: 0.0432 - val_accuracy: 0.9831\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0498 - accuracy: 0.9808 - val_loss: 0.0425 - val_accuracy: 0.9837\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0475 - accuracy: 0.9816 - val_loss: 0.0417 - val_accuracy: 0.9838\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0459 - accuracy: 0.9822 - val_loss: 0.0416 - val_accuracy: 0.9835\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0435 - accuracy: 0.9832 - val_loss: 0.0402 - val_accuracy: 0.9847\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0421 - accuracy: 0.9833 - val_loss: 0.0386 - val_accuracy: 0.9852\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0399 - accuracy: 0.9844 - val_loss: 0.0388 - val_accuracy: 0.9852\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0393 - accuracy: 0.9846 - val_loss: 0.0408 - val_accuracy: 0.9845\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0378 - accuracy: 0.9849 - val_loss: 0.0392 - val_accuracy: 0.9855\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0365 - accuracy: 0.9857 - val_loss: 0.0382 - val_accuracy: 0.9855\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0359 - accuracy: 0.9858 - val_loss: 0.0390 - val_accuracy: 0.9854\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0345 - accuracy: 0.9864 - val_loss: 0.0406 - val_accuracy: 0.9847\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0337 - accuracy: 0.9866 - val_loss: 0.0391 - val_accuracy: 0.9860\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0329 - accuracy: 0.9868 - val_loss: 0.0401 - val_accuracy: 0.9853\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0315 - accuracy: 0.9874 - val_loss: 0.0384 - val_accuracy: 0.9856\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0310 - accuracy: 0.9876 - val_loss: 0.0387 - val_accuracy: 0.9859\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0306 - accuracy: 0.9878 - val_loss: 0.0389 - val_accuracy: 0.9856\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0301 - accuracy: 0.9878 - val_loss: 0.0379 - val_accuracy: 0.9860\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0292 - accuracy: 0.9882 - val_loss: 0.0393 - val_accuracy: 0.9859\n",
      "Epoch 26/100\n",
      "50000/50000 - 3s - loss: 0.0288 - accuracy: 0.9884 - val_loss: 0.0401 - val_accuracy: 0.9858\n",
      "Epoch 27/100\n",
      "50000/50000 - 3s - loss: 0.0280 - accuracy: 0.9886 - val_loss: 0.0410 - val_accuracy: 0.9859\n",
      "Epoch 28/100\n",
      "50000/50000 - 3s - loss: 0.0274 - accuracy: 0.9890 - val_loss: 0.0410 - val_accuracy: 0.9862\n",
      "Epoch 29/100\n",
      "50000/50000 - 3s - loss: 0.0273 - accuracy: 0.9891 - val_loss: 0.0405 - val_accuracy: 0.9859\n",
      "Epoch 30/100\n",
      "50000/50000 - 3s - loss: 0.0266 - accuracy: 0.9891 - val_loss: 0.0418 - val_accuracy: 0.9863\n",
      "Epoch 31/100\n",
      "50000/50000 - 3s - loss: 0.0258 - accuracy: 0.9896 - val_loss: 0.0425 - val_accuracy: 0.9854\n",
      "Epoch 32/100\n",
      "50000/50000 - 3s - loss: 0.0253 - accuracy: 0.9898 - val_loss: 0.0438 - val_accuracy: 0.9857\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 33/100\n",
      "50000/50000 - 3s - loss: 0.0247 - accuracy: 0.9899 - val_loss: 0.0430 - val_accuracy: 0.9865\n",
      "Epoch 34/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0247 - accuracy: 0.9900 - val_loss: 0.0429 - val_accuracy: 0.9862\n",
      "Epoch 00034: early stopping\n",
      "rho: 0.0, ll: 1.1629719734191895\n",
      "rho: 1e-05, ll: 1.1628974676132202\n",
      "rho: 5e-05, ll: 1.1625765562057495\n",
      "rho: 0.0001, ll: 1.1621243953704834\n",
      "rho: 0.0005, ll: 1.1568537950515747\n",
      "rho: 0.001, ll: 1.1474125385284424\n",
      "rho: 0.005, ll: 1.0412272214889526\n",
      "rho: 0.01, ll: 0.907244086265564\n",
      "rho: 0.05, ll: 0.26197701692581177\n",
      "rho: 0.1, ll: -0.047956641763448715\n",
      "best_rho: 0.0, best_ll: 1.1629719734191895\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.1191 - accuracy: 0.9541 - val_loss: 0.0663 - val_accuracy: 0.9740\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.0727 - accuracy: 0.9720 - val_loss: 0.0562 - val_accuracy: 0.9782\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.0634 - accuracy: 0.9755 - val_loss: 0.0535 - val_accuracy: 0.9789\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.0580 - accuracy: 0.9774 - val_loss: 0.0485 - val_accuracy: 0.9809\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.0538 - accuracy: 0.9792 - val_loss: 0.0464 - val_accuracy: 0.9819\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.0504 - accuracy: 0.9804 - val_loss: 0.0442 - val_accuracy: 0.9829\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.0479 - accuracy: 0.9813 - val_loss: 0.0429 - val_accuracy: 0.9834\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.0452 - accuracy: 0.9824 - val_loss: 0.0424 - val_accuracy: 0.9831\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.0432 - accuracy: 0.9830 - val_loss: 0.0419 - val_accuracy: 0.9838\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.0416 - accuracy: 0.9837 - val_loss: 0.0409 - val_accuracy: 0.9846\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.0394 - accuracy: 0.9842 - val_loss: 0.0397 - val_accuracy: 0.9849\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.0382 - accuracy: 0.9849 - val_loss: 0.0399 - val_accuracy: 0.9852\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.0365 - accuracy: 0.9854 - val_loss: 0.0430 - val_accuracy: 0.9842\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.0351 - accuracy: 0.9861 - val_loss: 0.0405 - val_accuracy: 0.9850\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.0341 - accuracy: 0.9864 - val_loss: 0.0406 - val_accuracy: 0.9850\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.0328 - accuracy: 0.9870 - val_loss: 0.0396 - val_accuracy: 0.9858\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.0321 - accuracy: 0.9871 - val_loss: 0.0414 - val_accuracy: 0.9851\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0307 - accuracy: 0.9876 - val_loss: 0.0397 - val_accuracy: 0.9858\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0301 - accuracy: 0.9880 - val_loss: 0.0418 - val_accuracy: 0.9849\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0286 - accuracy: 0.9885 - val_loss: 0.0419 - val_accuracy: 0.9855\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0283 - accuracy: 0.9887 - val_loss: 0.0403 - val_accuracy: 0.9857\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0281 - accuracy: 0.9885 - val_loss: 0.0410 - val_accuracy: 0.9854\n",
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.0270 - accuracy: 0.9892 - val_loss: 0.0418 - val_accuracy: 0.9856\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.0260 - accuracy: 0.9894 - val_loss: 0.0428 - val_accuracy: 0.9854\n",
      "Epoch 25/100\n",
      "50000/50000 - 3s - loss: 0.0250 - accuracy: 0.9899 - val_loss: 0.0444 - val_accuracy: 0.9853\n",
      "Epoch 26/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0245 - accuracy: 0.9901 - val_loss: 0.0442 - val_accuracy: 0.9856\n",
      "Epoch 00026: early stopping\n",
      "rho: 0.0, ll: 1.2887239456176758\n",
      "rho: 1e-05, ll: 1.2886874675750732\n",
      "rho: 5e-05, ll: 1.288527011871338\n",
      "rho: 0.0001, ll: 1.2882939577102661\n",
      "rho: 0.0005, ll: 1.2853211164474487\n",
      "rho: 0.001, ll: 1.279475212097168\n",
      "rho: 0.005, ll: 1.1985288858413696\n",
      "rho: 0.01, ll: 1.0773169994354248\n",
      "rho: 0.05, ll: 0.38064026832580566\n",
      "rho: 0.1, ll: 0.018676452338695526\n",
      "best_rho: 0.0, best_ll: 1.2887239456176758\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5430 - accuracy: 0.8086 - val_loss: 0.3174 - val_accuracy: 0.8836\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3489 - accuracy: 0.8746 - val_loss: 0.2699 - val_accuracy: 0.9021\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3014 - accuracy: 0.8923 - val_loss: 0.2516 - val_accuracy: 0.9084\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2686 - accuracy: 0.9032 - val_loss: 0.2306 - val_accuracy: 0.9148\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2467 - accuracy: 0.9086 - val_loss: 0.2241 - val_accuracy: 0.9171\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2230 - accuracy: 0.9184 - val_loss: 0.2210 - val_accuracy: 0.9214\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2059 - accuracy: 0.9242 - val_loss: 0.2133 - val_accuracy: 0.9242\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1914 - accuracy: 0.9273 - val_loss: 0.2039 - val_accuracy: 0.9272\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1762 - accuracy: 0.9343 - val_loss: 0.2013 - val_accuracy: 0.9285\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1638 - accuracy: 0.9396 - val_loss: 0.2061 - val_accuracy: 0.9290\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1564 - accuracy: 0.9409 - val_loss: 0.2046 - val_accuracy: 0.9289\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1411 - accuracy: 0.9466 - val_loss: 0.2045 - val_accuracy: 0.9292\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1367 - accuracy: 0.9485 - val_loss: 0.2083 - val_accuracy: 0.9298\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1267 - accuracy: 0.9522 - val_loss: 0.2205 - val_accuracy: 0.9302\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1211 - accuracy: 0.9540 - val_loss: 0.2337 - val_accuracy: 0.9280\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1151 - accuracy: 0.9564 - val_loss: 0.2315 - val_accuracy: 0.9265\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1109 - accuracy: 0.9579 - val_loss: 0.2199 - val_accuracy: 0.9280\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1057 - accuracy: 0.9604 - val_loss: 0.2238 - val_accuracy: 0.9305\n",
      "Epoch 19/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0972 - accuracy: 0.9625 - val_loss: 0.2319 - val_accuracy: 0.9324\n",
      "Epoch 00019: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5713 - accuracy: 0.7980 - val_loss: 0.3384 - val_accuracy: 0.8765\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3717 - accuracy: 0.8676 - val_loss: 0.2808 - val_accuracy: 0.8966\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3165 - accuracy: 0.8860 - val_loss: 0.2604 - val_accuracy: 0.9018\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2813 - accuracy: 0.8982 - val_loss: 0.2346 - val_accuracy: 0.9153\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2519 - accuracy: 0.9079 - val_loss: 0.2290 - val_accuracy: 0.9173\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2363 - accuracy: 0.9143 - val_loss: 0.2244 - val_accuracy: 0.9190\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2170 - accuracy: 0.9197 - val_loss: 0.2174 - val_accuracy: 0.9209\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2027 - accuracy: 0.9249 - val_loss: 0.2087 - val_accuracy: 0.9254\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1912 - accuracy: 0.9297 - val_loss: 0.2080 - val_accuracy: 0.9254\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1789 - accuracy: 0.9331 - val_loss: 0.2076 - val_accuracy: 0.9277\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1678 - accuracy: 0.9378 - val_loss: 0.2095 - val_accuracy: 0.9279\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1559 - accuracy: 0.9406 - val_loss: 0.2016 - val_accuracy: 0.9298\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1458 - accuracy: 0.9443 - val_loss: 0.2057 - val_accuracy: 0.9285\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1394 - accuracy: 0.9481 - val_loss: 0.2145 - val_accuracy: 0.9294\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1307 - accuracy: 0.9505 - val_loss: 0.2206 - val_accuracy: 0.9295\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1273 - accuracy: 0.9522 - val_loss: 0.2163 - val_accuracy: 0.9287\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1188 - accuracy: 0.9551 - val_loss: 0.2182 - val_accuracy: 0.9312\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1092 - accuracy: 0.9575 - val_loss: 0.2308 - val_accuracy: 0.9300\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1103 - accuracy: 0.9582 - val_loss: 0.2279 - val_accuracy: 0.9291\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1004 - accuracy: 0.9606 - val_loss: 0.2298 - val_accuracy: 0.9306\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.1019 - accuracy: 0.9612 - val_loss: 0.2366 - val_accuracy: 0.9321\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0973 - accuracy: 0.9621 - val_loss: 0.2464 - val_accuracy: 0.9301\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5505 - accuracy: 0.8040 - val_loss: 0.3414 - val_accuracy: 0.8741\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3611 - accuracy: 0.8719 - val_loss: 0.2794 - val_accuracy: 0.8967\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3107 - accuracy: 0.8885 - val_loss: 0.2571 - val_accuracy: 0.9071\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2783 - accuracy: 0.8988 - val_loss: 0.2400 - val_accuracy: 0.9091\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2482 - accuracy: 0.9092 - val_loss: 0.2234 - val_accuracy: 0.9185\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2302 - accuracy: 0.9158 - val_loss: 0.2289 - val_accuracy: 0.9194\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2137 - accuracy: 0.9210 - val_loss: 0.2174 - val_accuracy: 0.9209\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1986 - accuracy: 0.9271 - val_loss: 0.2077 - val_accuracy: 0.9265\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1853 - accuracy: 0.9313 - val_loss: 0.2115 - val_accuracy: 0.9249\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1688 - accuracy: 0.9379 - val_loss: 0.2109 - val_accuracy: 0.9256\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1593 - accuracy: 0.9407 - val_loss: 0.2121 - val_accuracy: 0.9267\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1500 - accuracy: 0.9429 - val_loss: 0.2007 - val_accuracy: 0.9297\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1404 - accuracy: 0.9469 - val_loss: 0.2162 - val_accuracy: 0.9273\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1345 - accuracy: 0.9488 - val_loss: 0.2164 - val_accuracy: 0.9289\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1269 - accuracy: 0.9526 - val_loss: 0.2101 - val_accuracy: 0.9298\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1199 - accuracy: 0.9544 - val_loss: 0.2148 - val_accuracy: 0.9302\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1125 - accuracy: 0.9569 - val_loss: 0.2279 - val_accuracy: 0.9306\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1088 - accuracy: 0.9591 - val_loss: 0.2198 - val_accuracy: 0.9312\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1040 - accuracy: 0.9606 - val_loss: 0.2285 - val_accuracy: 0.9301\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0953 - accuracy: 0.9631 - val_loss: 0.2265 - val_accuracy: 0.9312\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0907 - accuracy: 0.9654 - val_loss: 0.2365 - val_accuracy: 0.9323\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0924 - accuracy: 0.9650 - val_loss: 0.2347 - val_accuracy: 0.9313\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5702 - accuracy: 0.8009 - val_loss: 0.3483 - val_accuracy: 0.8783\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3717 - accuracy: 0.8688 - val_loss: 0.2973 - val_accuracy: 0.8927\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3165 - accuracy: 0.8859 - val_loss: 0.2575 - val_accuracy: 0.9073\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2839 - accuracy: 0.8975 - val_loss: 0.2409 - val_accuracy: 0.9118\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2603 - accuracy: 0.9066 - val_loss: 0.2310 - val_accuracy: 0.9188\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2361 - accuracy: 0.9148 - val_loss: 0.2220 - val_accuracy: 0.9208\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2208 - accuracy: 0.9180 - val_loss: 0.2145 - val_accuracy: 0.9222\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2031 - accuracy: 0.9245 - val_loss: 0.2194 - val_accuracy: 0.9222\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1920 - accuracy: 0.9287 - val_loss: 0.2055 - val_accuracy: 0.9268\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1786 - accuracy: 0.9345 - val_loss: 0.2024 - val_accuracy: 0.9289\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1689 - accuracy: 0.9367 - val_loss: 0.2104 - val_accuracy: 0.9255\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1590 - accuracy: 0.9399 - val_loss: 0.2107 - val_accuracy: 0.9252\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1502 - accuracy: 0.9427 - val_loss: 0.2100 - val_accuracy: 0.9302\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1394 - accuracy: 0.9475 - val_loss: 0.2125 - val_accuracy: 0.9304\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1332 - accuracy: 0.9486 - val_loss: 0.2143 - val_accuracy: 0.9286\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1234 - accuracy: 0.9521 - val_loss: 0.2190 - val_accuracy: 0.9274\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1218 - accuracy: 0.9538 - val_loss: 0.2062 - val_accuracy: 0.9327\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1175 - accuracy: 0.9557 - val_loss: 0.2200 - val_accuracy: 0.9306\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1083 - accuracy: 0.9590 - val_loss: 0.2416 - val_accuracy: 0.9231\n",
      "Epoch 20/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1024 - accuracy: 0.9602 - val_loss: 0.2379 - val_accuracy: 0.9301\n",
      "Epoch 00020: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5712 - accuracy: 0.7987 - val_loss: 0.3430 - val_accuracy: 0.8751\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3708 - accuracy: 0.8667 - val_loss: 0.2806 - val_accuracy: 0.8956\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3151 - accuracy: 0.8883 - val_loss: 0.2598 - val_accuracy: 0.9047\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2838 - accuracy: 0.8986 - val_loss: 0.2405 - val_accuracy: 0.9105\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2553 - accuracy: 0.9082 - val_loss: 0.2197 - val_accuracy: 0.9201\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2345 - accuracy: 0.9144 - val_loss: 0.2136 - val_accuracy: 0.9236\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2159 - accuracy: 0.9196 - val_loss: 0.2054 - val_accuracy: 0.9244\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2006 - accuracy: 0.9266 - val_loss: 0.2014 - val_accuracy: 0.9273\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1859 - accuracy: 0.9322 - val_loss: 0.1999 - val_accuracy: 0.9274\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1723 - accuracy: 0.9353 - val_loss: 0.1954 - val_accuracy: 0.9295\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1619 - accuracy: 0.9389 - val_loss: 0.2052 - val_accuracy: 0.9268\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1530 - accuracy: 0.9416 - val_loss: 0.2020 - val_accuracy: 0.9292\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1445 - accuracy: 0.9443 - val_loss: 0.2040 - val_accuracy: 0.9312\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1335 - accuracy: 0.9491 - val_loss: 0.2038 - val_accuracy: 0.9315\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1244 - accuracy: 0.9534 - val_loss: 0.2016 - val_accuracy: 0.9336\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1224 - accuracy: 0.9522 - val_loss: 0.2029 - val_accuracy: 0.9339\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1147 - accuracy: 0.9563 - val_loss: 0.2107 - val_accuracy: 0.9344\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1123 - accuracy: 0.9560 - val_loss: 0.2015 - val_accuracy: 0.9358\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1036 - accuracy: 0.9613 - val_loss: 0.2103 - val_accuracy: 0.9351\n",
      "Epoch 20/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1034 - accuracy: 0.9606 - val_loss: 0.2149 - val_accuracy: 0.9298\n",
      "Epoch 00020: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5378 - accuracy: 0.8105 - val_loss: 0.3436 - val_accuracy: 0.8759\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3440 - accuracy: 0.8751 - val_loss: 0.2906 - val_accuracy: 0.8964\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.2982 - accuracy: 0.8924 - val_loss: 0.2758 - val_accuracy: 0.8993\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2653 - accuracy: 0.9039 - val_loss: 0.2541 - val_accuracy: 0.9077\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2389 - accuracy: 0.9119 - val_loss: 0.2403 - val_accuracy: 0.9145\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2214 - accuracy: 0.9186 - val_loss: 0.2306 - val_accuracy: 0.9156\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2040 - accuracy: 0.9257 - val_loss: 0.2271 - val_accuracy: 0.9206\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1901 - accuracy: 0.9296 - val_loss: 0.2276 - val_accuracy: 0.9179\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1766 - accuracy: 0.9341 - val_loss: 0.2181 - val_accuracy: 0.9227\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1646 - accuracy: 0.9377 - val_loss: 0.2223 - val_accuracy: 0.9232\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1514 - accuracy: 0.9426 - val_loss: 0.2214 - val_accuracy: 0.9244\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1432 - accuracy: 0.9457 - val_loss: 0.2227 - val_accuracy: 0.9234\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1333 - accuracy: 0.9496 - val_loss: 0.2278 - val_accuracy: 0.9246\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1262 - accuracy: 0.9518 - val_loss: 0.2231 - val_accuracy: 0.9287\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1171 - accuracy: 0.9549 - val_loss: 0.2229 - val_accuracy: 0.9279\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1121 - accuracy: 0.9575 - val_loss: 0.2375 - val_accuracy: 0.9255\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1063 - accuracy: 0.9604 - val_loss: 0.2383 - val_accuracy: 0.9299\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1010 - accuracy: 0.9605 - val_loss: 0.2444 - val_accuracy: 0.9285\n",
      "Epoch 19/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0951 - accuracy: 0.9635 - val_loss: 0.2483 - val_accuracy: 0.9254\n",
      "Epoch 00019: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5376 - accuracy: 0.8095 - val_loss: 0.3373 - val_accuracy: 0.8760\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3573 - accuracy: 0.8740 - val_loss: 0.2900 - val_accuracy: 0.8949\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3087 - accuracy: 0.8896 - val_loss: 0.2610 - val_accuracy: 0.9034\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2731 - accuracy: 0.9009 - val_loss: 0.2561 - val_accuracy: 0.9043\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2492 - accuracy: 0.9091 - val_loss: 0.2312 - val_accuracy: 0.9154\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2288 - accuracy: 0.9158 - val_loss: 0.2342 - val_accuracy: 0.9140\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2135 - accuracy: 0.9221 - val_loss: 0.2299 - val_accuracy: 0.9182\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1999 - accuracy: 0.9265 - val_loss: 0.2307 - val_accuracy: 0.9149\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1832 - accuracy: 0.9321 - val_loss: 0.2186 - val_accuracy: 0.9214\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1732 - accuracy: 0.9352 - val_loss: 0.2218 - val_accuracy: 0.9204\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1586 - accuracy: 0.9394 - val_loss: 0.2258 - val_accuracy: 0.9255\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1538 - accuracy: 0.9409 - val_loss: 0.2122 - val_accuracy: 0.9286\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1410 - accuracy: 0.9467 - val_loss: 0.2125 - val_accuracy: 0.9275\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1313 - accuracy: 0.9494 - val_loss: 0.2269 - val_accuracy: 0.9254\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1254 - accuracy: 0.9528 - val_loss: 0.2238 - val_accuracy: 0.9259\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1199 - accuracy: 0.9551 - val_loss: 0.2339 - val_accuracy: 0.9259\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1143 - accuracy: 0.9563 - val_loss: 0.2281 - val_accuracy: 0.9246\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1071 - accuracy: 0.9590 - val_loss: 0.2348 - val_accuracy: 0.9271\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1008 - accuracy: 0.9611 - val_loss: 0.2332 - val_accuracy: 0.9293\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0983 - accuracy: 0.9620 - val_loss: 0.2444 - val_accuracy: 0.9271\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0927 - accuracy: 0.9651 - val_loss: 0.2441 - val_accuracy: 0.9291\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0909 - accuracy: 0.9653 - val_loss: 0.2556 - val_accuracy: 0.9275\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5542 - accuracy: 0.8064 - val_loss: 0.3564 - val_accuracy: 0.8724\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3643 - accuracy: 0.8711 - val_loss: 0.2998 - val_accuracy: 0.8917\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3098 - accuracy: 0.8891 - val_loss: 0.2843 - val_accuracy: 0.8967\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2744 - accuracy: 0.9009 - val_loss: 0.2593 - val_accuracy: 0.9051\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2560 - accuracy: 0.9069 - val_loss: 0.2428 - val_accuracy: 0.9139\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2342 - accuracy: 0.9143 - val_loss: 0.2346 - val_accuracy: 0.9152\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2181 - accuracy: 0.9186 - val_loss: 0.2300 - val_accuracy: 0.9210\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2048 - accuracy: 0.9241 - val_loss: 0.2324 - val_accuracy: 0.9172\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1895 - accuracy: 0.9301 - val_loss: 0.2222 - val_accuracy: 0.9229\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1777 - accuracy: 0.9345 - val_loss: 0.2279 - val_accuracy: 0.9208\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1618 - accuracy: 0.9397 - val_loss: 0.2197 - val_accuracy: 0.9236\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1545 - accuracy: 0.9415 - val_loss: 0.2267 - val_accuracy: 0.9250\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1449 - accuracy: 0.9451 - val_loss: 0.2421 - val_accuracy: 0.9250\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1364 - accuracy: 0.9474 - val_loss: 0.2279 - val_accuracy: 0.9244\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1304 - accuracy: 0.9499 - val_loss: 0.2415 - val_accuracy: 0.9245\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1235 - accuracy: 0.9539 - val_loss: 0.2380 - val_accuracy: 0.9260\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1196 - accuracy: 0.9542 - val_loss: 0.2399 - val_accuracy: 0.9256\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1118 - accuracy: 0.9570 - val_loss: 0.2437 - val_accuracy: 0.9254\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1074 - accuracy: 0.9583 - val_loss: 0.2620 - val_accuracy: 0.9264\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1027 - accuracy: 0.9601 - val_loss: 0.2625 - val_accuracy: 0.9259\n",
      "Epoch 21/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0944 - accuracy: 0.9633 - val_loss: 0.2510 - val_accuracy: 0.9273\n",
      "Epoch 00021: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5607 - accuracy: 0.8028 - val_loss: 0.3303 - val_accuracy: 0.8830\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3712 - accuracy: 0.8683 - val_loss: 0.2874 - val_accuracy: 0.8974\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3189 - accuracy: 0.8847 - val_loss: 0.2528 - val_accuracy: 0.9109\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2855 - accuracy: 0.8976 - val_loss: 0.2399 - val_accuracy: 0.9128\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2559 - accuracy: 0.9065 - val_loss: 0.2371 - val_accuracy: 0.9154\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2388 - accuracy: 0.9122 - val_loss: 0.2197 - val_accuracy: 0.9221\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2196 - accuracy: 0.9189 - val_loss: 0.2197 - val_accuracy: 0.9230\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2042 - accuracy: 0.9246 - val_loss: 0.2065 - val_accuracy: 0.9267\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1893 - accuracy: 0.9287 - val_loss: 0.2185 - val_accuracy: 0.9239\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1793 - accuracy: 0.9335 - val_loss: 0.2061 - val_accuracy: 0.9308\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1671 - accuracy: 0.9363 - val_loss: 0.2096 - val_accuracy: 0.9319\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1565 - accuracy: 0.9413 - val_loss: 0.2087 - val_accuracy: 0.9300\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1483 - accuracy: 0.9429 - val_loss: 0.2116 - val_accuracy: 0.9313\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1388 - accuracy: 0.9473 - val_loss: 0.2092 - val_accuracy: 0.9314\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1301 - accuracy: 0.9505 - val_loss: 0.2094 - val_accuracy: 0.9301\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1256 - accuracy: 0.9524 - val_loss: 0.2135 - val_accuracy: 0.9309\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1194 - accuracy: 0.9538 - val_loss: 0.2231 - val_accuracy: 0.9331\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1117 - accuracy: 0.9576 - val_loss: 0.2251 - val_accuracy: 0.9322\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1091 - accuracy: 0.9585 - val_loss: 0.2305 - val_accuracy: 0.9324\n",
      "Epoch 20/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1044 - accuracy: 0.9596 - val_loss: 0.2306 - val_accuracy: 0.9331\n",
      "Epoch 00020: early stopping\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5616 - accuracy: 0.8009 - val_loss: 0.3411 - val_accuracy: 0.8790\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3612 - accuracy: 0.8720 - val_loss: 0.2845 - val_accuracy: 0.8950\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3050 - accuracy: 0.8909 - val_loss: 0.2709 - val_accuracy: 0.8990\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2756 - accuracy: 0.9003 - val_loss: 0.2453 - val_accuracy: 0.9103\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2469 - accuracy: 0.9102 - val_loss: 0.2428 - val_accuracy: 0.9147\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2279 - accuracy: 0.9161 - val_loss: 0.2288 - val_accuracy: 0.9166\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2100 - accuracy: 0.9222 - val_loss: 0.2253 - val_accuracy: 0.9190\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1962 - accuracy: 0.9260 - val_loss: 0.2242 - val_accuracy: 0.9207\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1818 - accuracy: 0.9338 - val_loss: 0.2183 - val_accuracy: 0.9241\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1674 - accuracy: 0.9373 - val_loss: 0.2212 - val_accuracy: 0.9220\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1587 - accuracy: 0.9395 - val_loss: 0.2244 - val_accuracy: 0.9222\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1496 - accuracy: 0.9440 - val_loss: 0.2124 - val_accuracy: 0.9261\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1396 - accuracy: 0.9470 - val_loss: 0.2191 - val_accuracy: 0.9265\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1318 - accuracy: 0.9510 - val_loss: 0.2329 - val_accuracy: 0.9239\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1270 - accuracy: 0.9516 - val_loss: 0.2204 - val_accuracy: 0.9289\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1154 - accuracy: 0.9575 - val_loss: 0.2243 - val_accuracy: 0.9269\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1124 - accuracy: 0.9576 - val_loss: 0.2331 - val_accuracy: 0.9265\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1039 - accuracy: 0.9597 - val_loss: 0.2417 - val_accuracy: 0.9249\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1007 - accuracy: 0.9615 - val_loss: 0.2376 - val_accuracy: 0.9306\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0953 - accuracy: 0.9637 - val_loss: 0.2461 - val_accuracy: 0.9266\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0931 - accuracy: 0.9642 - val_loss: 0.2392 - val_accuracy: 0.9300\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0868 - accuracy: 0.9658 - val_loss: 0.2553 - val_accuracy: 0.9316\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5849 - accuracy: 0.7900 - val_loss: 0.3390 - val_accuracy: 0.8791\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3767 - accuracy: 0.8659 - val_loss: 0.2886 - val_accuracy: 0.8945\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3260 - accuracy: 0.8831 - val_loss: 0.2640 - val_accuracy: 0.9057\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2939 - accuracy: 0.8947 - val_loss: 0.2439 - val_accuracy: 0.9093\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2694 - accuracy: 0.9023 - val_loss: 0.2345 - val_accuracy: 0.9100\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2480 - accuracy: 0.9107 - val_loss: 0.2299 - val_accuracy: 0.9168\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2303 - accuracy: 0.9152 - val_loss: 0.2194 - val_accuracy: 0.9176\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2146 - accuracy: 0.9211 - val_loss: 0.2171 - val_accuracy: 0.9198\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.2037 - accuracy: 0.9251 - val_loss: 0.2093 - val_accuracy: 0.9225\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1904 - accuracy: 0.9298 - val_loss: 0.2142 - val_accuracy: 0.9198\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1809 - accuracy: 0.9330 - val_loss: 0.2056 - val_accuracy: 0.9251\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1706 - accuracy: 0.9369 - val_loss: 0.2130 - val_accuracy: 0.9254\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1603 - accuracy: 0.9395 - val_loss: 0.2137 - val_accuracy: 0.9242\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1524 - accuracy: 0.9415 - val_loss: 0.2113 - val_accuracy: 0.9259\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1425 - accuracy: 0.9456 - val_loss: 0.2085 - val_accuracy: 0.9274\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1327 - accuracy: 0.9498 - val_loss: 0.2224 - val_accuracy: 0.9285\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1308 - accuracy: 0.9501 - val_loss: 0.2133 - val_accuracy: 0.9271\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1228 - accuracy: 0.9534 - val_loss: 0.2217 - val_accuracy: 0.9284\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1169 - accuracy: 0.9559 - val_loss: 0.2199 - val_accuracy: 0.9287\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1143 - accuracy: 0.9564 - val_loss: 0.2157 - val_accuracy: 0.9268\n",
      "Epoch 21/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1081 - accuracy: 0.9587 - val_loss: 0.2338 - val_accuracy: 0.9279\n",
      "Epoch 00021: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5615 - accuracy: 0.8025 - val_loss: 0.3372 - val_accuracy: 0.8759\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3597 - accuracy: 0.8707 - val_loss: 0.2841 - val_accuracy: 0.8945\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3104 - accuracy: 0.8875 - val_loss: 0.2518 - val_accuracy: 0.9087\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2742 - accuracy: 0.8993 - val_loss: 0.2436 - val_accuracy: 0.9123\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2506 - accuracy: 0.9090 - val_loss: 0.2380 - val_accuracy: 0.9128\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2262 - accuracy: 0.9165 - val_loss: 0.2250 - val_accuracy: 0.9178\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2128 - accuracy: 0.9211 - val_loss: 0.2140 - val_accuracy: 0.9209\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1961 - accuracy: 0.9271 - val_loss: 0.2139 - val_accuracy: 0.9244\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1811 - accuracy: 0.9321 - val_loss: 0.2200 - val_accuracy: 0.9241\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1706 - accuracy: 0.9363 - val_loss: 0.2130 - val_accuracy: 0.9248\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1592 - accuracy: 0.9411 - val_loss: 0.2279 - val_accuracy: 0.9243\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1485 - accuracy: 0.9446 - val_loss: 0.2150 - val_accuracy: 0.9248\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1408 - accuracy: 0.9457 - val_loss: 0.2198 - val_accuracy: 0.9267\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1333 - accuracy: 0.9504 - val_loss: 0.2176 - val_accuracy: 0.9293\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1254 - accuracy: 0.9523 - val_loss: 0.2246 - val_accuracy: 0.9263\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1167 - accuracy: 0.9555 - val_loss: 0.2271 - val_accuracy: 0.9251\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1125 - accuracy: 0.9581 - val_loss: 0.2378 - val_accuracy: 0.9262\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1092 - accuracy: 0.9582 - val_loss: 0.2457 - val_accuracy: 0.9297\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1075 - accuracy: 0.9587 - val_loss: 0.2538 - val_accuracy: 0.9271\n",
      "Epoch 20/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0967 - accuracy: 0.9624 - val_loss: 0.2388 - val_accuracy: 0.9292\n",
      "Epoch 00020: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5629 - accuracy: 0.8002 - val_loss: 0.3646 - val_accuracy: 0.8685\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3687 - accuracy: 0.8687 - val_loss: 0.3118 - val_accuracy: 0.8856\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3193 - accuracy: 0.8858 - val_loss: 0.2837 - val_accuracy: 0.8953\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2863 - accuracy: 0.8968 - val_loss: 0.2613 - val_accuracy: 0.9072\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2633 - accuracy: 0.9033 - val_loss: 0.2589 - val_accuracy: 0.9065\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2382 - accuracy: 0.9122 - val_loss: 0.2475 - val_accuracy: 0.9110\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2218 - accuracy: 0.9181 - val_loss: 0.2374 - val_accuracy: 0.9171\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2070 - accuracy: 0.9229 - val_loss: 0.2371 - val_accuracy: 0.9186\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1917 - accuracy: 0.9296 - val_loss: 0.2356 - val_accuracy: 0.9165\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1803 - accuracy: 0.9324 - val_loss: 0.2323 - val_accuracy: 0.9220\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1711 - accuracy: 0.9350 - val_loss: 0.2306 - val_accuracy: 0.9228\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1590 - accuracy: 0.9395 - val_loss: 0.2341 - val_accuracy: 0.9230\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1544 - accuracy: 0.9420 - val_loss: 0.2213 - val_accuracy: 0.9267\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1442 - accuracy: 0.9446 - val_loss: 0.2380 - val_accuracy: 0.9230\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1373 - accuracy: 0.9487 - val_loss: 0.2353 - val_accuracy: 0.9243\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1298 - accuracy: 0.9502 - val_loss: 0.2405 - val_accuracy: 0.9244\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1229 - accuracy: 0.9536 - val_loss: 0.2593 - val_accuracy: 0.9249\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1186 - accuracy: 0.9550 - val_loss: 0.2376 - val_accuracy: 0.9263\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1096 - accuracy: 0.9583 - val_loss: 0.2456 - val_accuracy: 0.9264\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1081 - accuracy: 0.9579 - val_loss: 0.2586 - val_accuracy: 0.9239\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.1017 - accuracy: 0.9604 - val_loss: 0.2591 - val_accuracy: 0.9291\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0963 - accuracy: 0.9632 - val_loss: 0.2669 - val_accuracy: 0.9281\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0966 - accuracy: 0.9625 - val_loss: 0.2471 - val_accuracy: 0.9304\n",
      "Epoch 00023: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5483 - accuracy: 0.8056 - val_loss: 0.3333 - val_accuracy: 0.8823\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3567 - accuracy: 0.8724 - val_loss: 0.2826 - val_accuracy: 0.8996\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3054 - accuracy: 0.8899 - val_loss: 0.2620 - val_accuracy: 0.9030\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2744 - accuracy: 0.8996 - val_loss: 0.2501 - val_accuracy: 0.9089\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2457 - accuracy: 0.9110 - val_loss: 0.2362 - val_accuracy: 0.9173\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2294 - accuracy: 0.9136 - val_loss: 0.2282 - val_accuracy: 0.9215\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2094 - accuracy: 0.9231 - val_loss: 0.2191 - val_accuracy: 0.9233\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1972 - accuracy: 0.9276 - val_loss: 0.2202 - val_accuracy: 0.9215\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1837 - accuracy: 0.9315 - val_loss: 0.2143 - val_accuracy: 0.9275\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1662 - accuracy: 0.9374 - val_loss: 0.2143 - val_accuracy: 0.9276\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1597 - accuracy: 0.9396 - val_loss: 0.2122 - val_accuracy: 0.9297\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1486 - accuracy: 0.9443 - val_loss: 0.2029 - val_accuracy: 0.9325\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1400 - accuracy: 0.9467 - val_loss: 0.2201 - val_accuracy: 0.9285\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1295 - accuracy: 0.9501 - val_loss: 0.2285 - val_accuracy: 0.9292\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1284 - accuracy: 0.9510 - val_loss: 0.2175 - val_accuracy: 0.9290\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1177 - accuracy: 0.9548 - val_loss: 0.2286 - val_accuracy: 0.9323\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1135 - accuracy: 0.9562 - val_loss: 0.2364 - val_accuracy: 0.9300\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1058 - accuracy: 0.9589 - val_loss: 0.2333 - val_accuracy: 0.9325\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1005 - accuracy: 0.9608 - val_loss: 0.2466 - val_accuracy: 0.9331\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0973 - accuracy: 0.9624 - val_loss: 0.2379 - val_accuracy: 0.9332\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0926 - accuracy: 0.9635 - val_loss: 0.2356 - val_accuracy: 0.9305\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0889 - accuracy: 0.9660 - val_loss: 0.2525 - val_accuracy: 0.9292\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5451 - accuracy: 0.8040 - val_loss: 0.3197 - val_accuracy: 0.8823\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3565 - accuracy: 0.8715 - val_loss: 0.2772 - val_accuracy: 0.8972\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3060 - accuracy: 0.8898 - val_loss: 0.2464 - val_accuracy: 0.9102\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2753 - accuracy: 0.8998 - val_loss: 0.2436 - val_accuracy: 0.9094\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2496 - accuracy: 0.9082 - val_loss: 0.2228 - val_accuracy: 0.9185\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2307 - accuracy: 0.9159 - val_loss: 0.2218 - val_accuracy: 0.9211\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2158 - accuracy: 0.9192 - val_loss: 0.2100 - val_accuracy: 0.9249\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1992 - accuracy: 0.9256 - val_loss: 0.2049 - val_accuracy: 0.9249\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1842 - accuracy: 0.9321 - val_loss: 0.2102 - val_accuracy: 0.9257\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1726 - accuracy: 0.9359 - val_loss: 0.2093 - val_accuracy: 0.9280\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1658 - accuracy: 0.9382 - val_loss: 0.1999 - val_accuracy: 0.9296\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1539 - accuracy: 0.9421 - val_loss: 0.2011 - val_accuracy: 0.9309\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1430 - accuracy: 0.9462 - val_loss: 0.2024 - val_accuracy: 0.9301\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1362 - accuracy: 0.9488 - val_loss: 0.2128 - val_accuracy: 0.9302\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1306 - accuracy: 0.9508 - val_loss: 0.2065 - val_accuracy: 0.9318\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1220 - accuracy: 0.9541 - val_loss: 0.2134 - val_accuracy: 0.9329\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1145 - accuracy: 0.9566 - val_loss: 0.2152 - val_accuracy: 0.9306\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1069 - accuracy: 0.9595 - val_loss: 0.2262 - val_accuracy: 0.9302\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1029 - accuracy: 0.9609 - val_loss: 0.2287 - val_accuracy: 0.9321\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0978 - accuracy: 0.9625 - val_loss: 0.2303 - val_accuracy: 0.9347\n",
      "Epoch 21/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0934 - accuracy: 0.9646 - val_loss: 0.2338 - val_accuracy: 0.9304\n",
      "Epoch 00021: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5541 - accuracy: 0.8064 - val_loss: 0.3535 - val_accuracy: 0.8721\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3616 - accuracy: 0.8703 - val_loss: 0.3106 - val_accuracy: 0.8855\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3149 - accuracy: 0.8870 - val_loss: 0.2678 - val_accuracy: 0.9017\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2819 - accuracy: 0.8984 - val_loss: 0.2494 - val_accuracy: 0.9058\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2535 - accuracy: 0.9076 - val_loss: 0.2369 - val_accuracy: 0.9107\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2335 - accuracy: 0.9157 - val_loss: 0.2235 - val_accuracy: 0.9166\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2151 - accuracy: 0.9207 - val_loss: 0.2230 - val_accuracy: 0.9203\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2038 - accuracy: 0.9252 - val_loss: 0.2230 - val_accuracy: 0.9185\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1905 - accuracy: 0.9292 - val_loss: 0.2124 - val_accuracy: 0.9231\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1739 - accuracy: 0.9350 - val_loss: 0.2143 - val_accuracy: 0.9258\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1621 - accuracy: 0.9379 - val_loss: 0.2174 - val_accuracy: 0.9225\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1556 - accuracy: 0.9412 - val_loss: 0.2079 - val_accuracy: 0.9282\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1453 - accuracy: 0.9448 - val_loss: 0.2127 - val_accuracy: 0.9279\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1399 - accuracy: 0.9472 - val_loss: 0.2107 - val_accuracy: 0.9289\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1292 - accuracy: 0.9508 - val_loss: 0.2220 - val_accuracy: 0.9272\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1218 - accuracy: 0.9523 - val_loss: 0.2240 - val_accuracy: 0.9268\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1174 - accuracy: 0.9546 - val_loss: 0.2227 - val_accuracy: 0.9315\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1115 - accuracy: 0.9574 - val_loss: 0.2219 - val_accuracy: 0.9308\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1060 - accuracy: 0.9599 - val_loss: 0.2300 - val_accuracy: 0.9290\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0998 - accuracy: 0.9613 - val_loss: 0.2286 - val_accuracy: 0.9304\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0968 - accuracy: 0.9633 - val_loss: 0.2577 - val_accuracy: 0.9291\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0913 - accuracy: 0.9641 - val_loss: 0.2410 - val_accuracy: 0.9283\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5579 - accuracy: 0.8027 - val_loss: 0.3356 - val_accuracy: 0.8755\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3625 - accuracy: 0.8717 - val_loss: 0.2903 - val_accuracy: 0.8929\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3120 - accuracy: 0.8881 - val_loss: 0.2622 - val_accuracy: 0.9024\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2799 - accuracy: 0.8994 - val_loss: 0.2508 - val_accuracy: 0.9047\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2521 - accuracy: 0.9077 - val_loss: 0.2394 - val_accuracy: 0.9116\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2325 - accuracy: 0.9155 - val_loss: 0.2250 - val_accuracy: 0.9176\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2156 - accuracy: 0.9202 - val_loss: 0.2169 - val_accuracy: 0.9195\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1975 - accuracy: 0.9267 - val_loss: 0.2233 - val_accuracy: 0.9193\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1864 - accuracy: 0.9310 - val_loss: 0.2169 - val_accuracy: 0.9216\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1724 - accuracy: 0.9350 - val_loss: 0.2052 - val_accuracy: 0.9262\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1601 - accuracy: 0.9392 - val_loss: 0.2150 - val_accuracy: 0.9262\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1514 - accuracy: 0.9427 - val_loss: 0.2128 - val_accuracy: 0.9244\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1395 - accuracy: 0.9471 - val_loss: 0.2240 - val_accuracy: 0.9255\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1347 - accuracy: 0.9476 - val_loss: 0.2260 - val_accuracy: 0.9285\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1264 - accuracy: 0.9518 - val_loss: 0.2258 - val_accuracy: 0.9278\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1203 - accuracy: 0.9540 - val_loss: 0.2228 - val_accuracy: 0.9290\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1144 - accuracy: 0.9564 - val_loss: 0.2449 - val_accuracy: 0.9247\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1090 - accuracy: 0.9581 - val_loss: 0.2414 - val_accuracy: 0.9305\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1041 - accuracy: 0.9596 - val_loss: 0.2372 - val_accuracy: 0.9289\n",
      "Epoch 20/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1016 - accuracy: 0.9606 - val_loss: 0.2259 - val_accuracy: 0.9293\n",
      "Epoch 00020: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5507 - accuracy: 0.8050 - val_loss: 0.3446 - val_accuracy: 0.8770\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3544 - accuracy: 0.8744 - val_loss: 0.2957 - val_accuracy: 0.8921\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.2984 - accuracy: 0.8930 - val_loss: 0.2700 - val_accuracy: 0.8973\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2656 - accuracy: 0.9040 - val_loss: 0.2501 - val_accuracy: 0.9084\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2433 - accuracy: 0.9105 - val_loss: 0.2358 - val_accuracy: 0.9124\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2205 - accuracy: 0.9196 - val_loss: 0.2335 - val_accuracy: 0.9152\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2033 - accuracy: 0.9254 - val_loss: 0.2304 - val_accuracy: 0.9158\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1895 - accuracy: 0.9293 - val_loss: 0.2248 - val_accuracy: 0.9208\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1742 - accuracy: 0.9352 - val_loss: 0.2245 - val_accuracy: 0.9207\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1641 - accuracy: 0.9378 - val_loss: 0.2278 - val_accuracy: 0.9207\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1531 - accuracy: 0.9421 - val_loss: 0.2252 - val_accuracy: 0.9218\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1425 - accuracy: 0.9471 - val_loss: 0.2186 - val_accuracy: 0.9245\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1337 - accuracy: 0.9484 - val_loss: 0.2264 - val_accuracy: 0.9259\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1252 - accuracy: 0.9528 - val_loss: 0.2252 - val_accuracy: 0.9244\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1154 - accuracy: 0.9561 - val_loss: 0.2338 - val_accuracy: 0.9260\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1082 - accuracy: 0.9596 - val_loss: 0.2424 - val_accuracy: 0.9260\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1057 - accuracy: 0.9597 - val_loss: 0.2339 - val_accuracy: 0.9252\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.0989 - accuracy: 0.9614 - val_loss: 0.2499 - val_accuracy: 0.9266\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0959 - accuracy: 0.9634 - val_loss: 0.2430 - val_accuracy: 0.9268\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0881 - accuracy: 0.9662 - val_loss: 0.2705 - val_accuracy: 0.9257\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0864 - accuracy: 0.9669 - val_loss: 0.2565 - val_accuracy: 0.9299\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0857 - accuracy: 0.9667 - val_loss: 0.2590 - val_accuracy: 0.9250\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5423 - accuracy: 0.8094 - val_loss: 0.3522 - val_accuracy: 0.8730\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3530 - accuracy: 0.8730 - val_loss: 0.2929 - val_accuracy: 0.8929\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3006 - accuracy: 0.8923 - val_loss: 0.2750 - val_accuracy: 0.8983\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2725 - accuracy: 0.9015 - val_loss: 0.2491 - val_accuracy: 0.9064\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2500 - accuracy: 0.9089 - val_loss: 0.2601 - val_accuracy: 0.9041\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2331 - accuracy: 0.9137 - val_loss: 0.2315 - val_accuracy: 0.9158\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2116 - accuracy: 0.9221 - val_loss: 0.2354 - val_accuracy: 0.9116\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1980 - accuracy: 0.9251 - val_loss: 0.2258 - val_accuracy: 0.9166\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1871 - accuracy: 0.9287 - val_loss: 0.2357 - val_accuracy: 0.9137\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1730 - accuracy: 0.9368 - val_loss: 0.2191 - val_accuracy: 0.9214\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1651 - accuracy: 0.9386 - val_loss: 0.2319 - val_accuracy: 0.9137\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1525 - accuracy: 0.9419 - val_loss: 0.2231 - val_accuracy: 0.9218\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1449 - accuracy: 0.9450 - val_loss: 0.2310 - val_accuracy: 0.9234\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1349 - accuracy: 0.9479 - val_loss: 0.2386 - val_accuracy: 0.9201\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1276 - accuracy: 0.9511 - val_loss: 0.2300 - val_accuracy: 0.9241\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1249 - accuracy: 0.9527 - val_loss: 0.2403 - val_accuracy: 0.9246\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1175 - accuracy: 0.9551 - val_loss: 0.2373 - val_accuracy: 0.9218\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1097 - accuracy: 0.9577 - val_loss: 0.2350 - val_accuracy: 0.9257\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1071 - accuracy: 0.9579 - val_loss: 0.2540 - val_accuracy: 0.9225\n",
      "Epoch 20/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1022 - accuracy: 0.9600 - val_loss: 0.2561 - val_accuracy: 0.9247\n",
      "Epoch 00020: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5586 - accuracy: 0.8042 - val_loss: 0.3417 - val_accuracy: 0.8766\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3619 - accuracy: 0.8700 - val_loss: 0.3086 - val_accuracy: 0.8849\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3156 - accuracy: 0.8862 - val_loss: 0.2556 - val_accuracy: 0.9056\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2826 - accuracy: 0.8972 - val_loss: 0.2524 - val_accuracy: 0.9079\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2576 - accuracy: 0.9073 - val_loss: 0.2255 - val_accuracy: 0.9191\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2360 - accuracy: 0.9138 - val_loss: 0.2318 - val_accuracy: 0.9175\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2188 - accuracy: 0.9191 - val_loss: 0.2271 - val_accuracy: 0.9179\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2062 - accuracy: 0.9239 - val_loss: 0.2082 - val_accuracy: 0.9259\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1932 - accuracy: 0.9272 - val_loss: 0.2124 - val_accuracy: 0.9266\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1797 - accuracy: 0.9321 - val_loss: 0.2098 - val_accuracy: 0.9289\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1696 - accuracy: 0.9356 - val_loss: 0.2097 - val_accuracy: 0.9292\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1607 - accuracy: 0.9388 - val_loss: 0.2042 - val_accuracy: 0.9325\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1522 - accuracy: 0.9430 - val_loss: 0.2182 - val_accuracy: 0.9276\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1452 - accuracy: 0.9449 - val_loss: 0.2190 - val_accuracy: 0.9302\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1366 - accuracy: 0.9478 - val_loss: 0.2138 - val_accuracy: 0.9314\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1298 - accuracy: 0.9504 - val_loss: 0.2116 - val_accuracy: 0.9315\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1236 - accuracy: 0.9533 - val_loss: 0.2120 - val_accuracy: 0.9325\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1187 - accuracy: 0.9547 - val_loss: 0.2146 - val_accuracy: 0.9335\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1127 - accuracy: 0.9559 - val_loss: 0.2203 - val_accuracy: 0.9354\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1068 - accuracy: 0.9585 - val_loss: 0.2265 - val_accuracy: 0.9336\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.1039 - accuracy: 0.9598 - val_loss: 0.2242 - val_accuracy: 0.9348\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0978 - accuracy: 0.9624 - val_loss: 0.2414 - val_accuracy: 0.9338\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5616 - accuracy: 0.7996 - val_loss: 0.3387 - val_accuracy: 0.8814\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3617 - accuracy: 0.8704 - val_loss: 0.2804 - val_accuracy: 0.9004\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3055 - accuracy: 0.8903 - val_loss: 0.2467 - val_accuracy: 0.9132\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2699 - accuracy: 0.9016 - val_loss: 0.2369 - val_accuracy: 0.9126\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2470 - accuracy: 0.9095 - val_loss: 0.2231 - val_accuracy: 0.9186\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2258 - accuracy: 0.9175 - val_loss: 0.2206 - val_accuracy: 0.9186\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2072 - accuracy: 0.9232 - val_loss: 0.2172 - val_accuracy: 0.9165\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1916 - accuracy: 0.9282 - val_loss: 0.2167 - val_accuracy: 0.9237\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1808 - accuracy: 0.9333 - val_loss: 0.2098 - val_accuracy: 0.9245\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1689 - accuracy: 0.9368 - val_loss: 0.2060 - val_accuracy: 0.9264\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1573 - accuracy: 0.9400 - val_loss: 0.2117 - val_accuracy: 0.9265\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1468 - accuracy: 0.9451 - val_loss: 0.2026 - val_accuracy: 0.9297\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1401 - accuracy: 0.9476 - val_loss: 0.2282 - val_accuracy: 0.9253\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1330 - accuracy: 0.9493 - val_loss: 0.2112 - val_accuracy: 0.9268\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1271 - accuracy: 0.9513 - val_loss: 0.2264 - val_accuracy: 0.9272\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1192 - accuracy: 0.9545 - val_loss: 0.2310 - val_accuracy: 0.9256\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1119 - accuracy: 0.9570 - val_loss: 0.2331 - val_accuracy: 0.9263\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1051 - accuracy: 0.9604 - val_loss: 0.2432 - val_accuracy: 0.9281\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1012 - accuracy: 0.9606 - val_loss: 0.2456 - val_accuracy: 0.9259\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.0991 - accuracy: 0.9622 - val_loss: 0.2398 - val_accuracy: 0.9299\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0942 - accuracy: 0.9640 - val_loss: 0.2449 - val_accuracy: 0.9309\n",
      "Epoch 22/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0896 - accuracy: 0.9655 - val_loss: 0.2579 - val_accuracy: 0.9283\n",
      "Epoch 00022: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5671 - accuracy: 0.8002 - val_loss: 0.3345 - val_accuracy: 0.8808\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3658 - accuracy: 0.8699 - val_loss: 0.2885 - val_accuracy: 0.8923\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3131 - accuracy: 0.8874 - val_loss: 0.2526 - val_accuracy: 0.9039\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2757 - accuracy: 0.8993 - val_loss: 0.2441 - val_accuracy: 0.9089\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2519 - accuracy: 0.9093 - val_loss: 0.2264 - val_accuracy: 0.9140\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2300 - accuracy: 0.9163 - val_loss: 0.2228 - val_accuracy: 0.9162\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2144 - accuracy: 0.9205 - val_loss: 0.2210 - val_accuracy: 0.9194\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2013 - accuracy: 0.9254 - val_loss: 0.2191 - val_accuracy: 0.9187\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1833 - accuracy: 0.9313 - val_loss: 0.2218 - val_accuracy: 0.9210\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1713 - accuracy: 0.9356 - val_loss: 0.2181 - val_accuracy: 0.9189\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1632 - accuracy: 0.9373 - val_loss: 0.2178 - val_accuracy: 0.9241\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1525 - accuracy: 0.9425 - val_loss: 0.2202 - val_accuracy: 0.9231\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1442 - accuracy: 0.9450 - val_loss: 0.2103 - val_accuracy: 0.9270\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1379 - accuracy: 0.9477 - val_loss: 0.2225 - val_accuracy: 0.9225\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1308 - accuracy: 0.9495 - val_loss: 0.2215 - val_accuracy: 0.9265\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1172 - accuracy: 0.9558 - val_loss: 0.2106 - val_accuracy: 0.9288\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1146 - accuracy: 0.9570 - val_loss: 0.2172 - val_accuracy: 0.9283\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1103 - accuracy: 0.9572 - val_loss: 0.2240 - val_accuracy: 0.9309\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1025 - accuracy: 0.9614 - val_loss: 0.2280 - val_accuracy: 0.9281\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1009 - accuracy: 0.9619 - val_loss: 0.2259 - val_accuracy: 0.9319\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.0937 - accuracy: 0.9644 - val_loss: 0.2432 - val_accuracy: 0.9293\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0906 - accuracy: 0.9646 - val_loss: 0.2528 - val_accuracy: 0.9282\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0892 - accuracy: 0.9662 - val_loss: 0.2409 - val_accuracy: 0.9289\n",
      "Epoch 00023: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5576 - accuracy: 0.8016 - val_loss: 0.3358 - val_accuracy: 0.8760\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3623 - accuracy: 0.8712 - val_loss: 0.2771 - val_accuracy: 0.9008\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3132 - accuracy: 0.8865 - val_loss: 0.2560 - val_accuracy: 0.9053\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2815 - accuracy: 0.8981 - val_loss: 0.2454 - val_accuracy: 0.9082\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2566 - accuracy: 0.9068 - val_loss: 0.2259 - val_accuracy: 0.9141\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2344 - accuracy: 0.9136 - val_loss: 0.2179 - val_accuracy: 0.9191\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2183 - accuracy: 0.9195 - val_loss: 0.2061 - val_accuracy: 0.9226\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2003 - accuracy: 0.9265 - val_loss: 0.2069 - val_accuracy: 0.9228\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1896 - accuracy: 0.9288 - val_loss: 0.2017 - val_accuracy: 0.9257\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1772 - accuracy: 0.9336 - val_loss: 0.2019 - val_accuracy: 0.9229\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1664 - accuracy: 0.9387 - val_loss: 0.2112 - val_accuracy: 0.9253\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1552 - accuracy: 0.9414 - val_loss: 0.1994 - val_accuracy: 0.9290\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1460 - accuracy: 0.9456 - val_loss: 0.1933 - val_accuracy: 0.9296\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1381 - accuracy: 0.9477 - val_loss: 0.2103 - val_accuracy: 0.9261\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1298 - accuracy: 0.9505 - val_loss: 0.2031 - val_accuracy: 0.9303\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1246 - accuracy: 0.9527 - val_loss: 0.2099 - val_accuracy: 0.9290\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1178 - accuracy: 0.9553 - val_loss: 0.2152 - val_accuracy: 0.9318\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1126 - accuracy: 0.9572 - val_loss: 0.2171 - val_accuracy: 0.9318\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1064 - accuracy: 0.9584 - val_loss: 0.2140 - val_accuracy: 0.9304\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1039 - accuracy: 0.9607 - val_loss: 0.2112 - val_accuracy: 0.9324\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.1005 - accuracy: 0.9614 - val_loss: 0.2221 - val_accuracy: 0.9325\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.0948 - accuracy: 0.9638 - val_loss: 0.2174 - val_accuracy: 0.9327\n",
      "Epoch 23/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0872 - accuracy: 0.9666 - val_loss: 0.2366 - val_accuracy: 0.9332\n",
      "Epoch 00023: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5459 - accuracy: 0.8076 - val_loss: 0.3531 - val_accuracy: 0.8691\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3588 - accuracy: 0.8712 - val_loss: 0.3011 - val_accuracy: 0.8892\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3069 - accuracy: 0.8903 - val_loss: 0.2671 - val_accuracy: 0.9040\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2767 - accuracy: 0.9002 - val_loss: 0.2516 - val_accuracy: 0.9094\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2515 - accuracy: 0.9084 - val_loss: 0.2459 - val_accuracy: 0.9097\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2294 - accuracy: 0.9156 - val_loss: 0.2300 - val_accuracy: 0.9184\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2118 - accuracy: 0.9219 - val_loss: 0.2213 - val_accuracy: 0.9195\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1969 - accuracy: 0.9273 - val_loss: 0.2223 - val_accuracy: 0.9215\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1831 - accuracy: 0.9330 - val_loss: 0.2223 - val_accuracy: 0.9223\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1693 - accuracy: 0.9372 - val_loss: 0.2302 - val_accuracy: 0.9184\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1619 - accuracy: 0.9396 - val_loss: 0.2246 - val_accuracy: 0.9219\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1515 - accuracy: 0.9432 - val_loss: 0.2392 - val_accuracy: 0.9212\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1433 - accuracy: 0.9462 - val_loss: 0.2327 - val_accuracy: 0.9202\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1348 - accuracy: 0.9480 - val_loss: 0.2264 - val_accuracy: 0.9231\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1264 - accuracy: 0.9515 - val_loss: 0.2322 - val_accuracy: 0.9231\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1193 - accuracy: 0.9541 - val_loss: 0.2407 - val_accuracy: 0.9205\n",
      "Epoch 17/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1145 - accuracy: 0.9563 - val_loss: 0.2306 - val_accuracy: 0.9254\n",
      "Epoch 00017: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5674 - accuracy: 0.8001 - val_loss: 0.3592 - val_accuracy: 0.8678\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3684 - accuracy: 0.8680 - val_loss: 0.2920 - val_accuracy: 0.8904\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3207 - accuracy: 0.8854 - val_loss: 0.2644 - val_accuracy: 0.9018\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2870 - accuracy: 0.8971 - val_loss: 0.2493 - val_accuracy: 0.9085\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2640 - accuracy: 0.9043 - val_loss: 0.2382 - val_accuracy: 0.9122\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2402 - accuracy: 0.9125 - val_loss: 0.2232 - val_accuracy: 0.9173\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2278 - accuracy: 0.9165 - val_loss: 0.2242 - val_accuracy: 0.9176\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2121 - accuracy: 0.9223 - val_loss: 0.2126 - val_accuracy: 0.9208\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1982 - accuracy: 0.9259 - val_loss: 0.2189 - val_accuracy: 0.9204\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1865 - accuracy: 0.9299 - val_loss: 0.2146 - val_accuracy: 0.9215\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1726 - accuracy: 0.9359 - val_loss: 0.2024 - val_accuracy: 0.9260\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1621 - accuracy: 0.9394 - val_loss: 0.2101 - val_accuracy: 0.9244\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1563 - accuracy: 0.9411 - val_loss: 0.2148 - val_accuracy: 0.9250\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1458 - accuracy: 0.9441 - val_loss: 0.2125 - val_accuracy: 0.9272\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1417 - accuracy: 0.9468 - val_loss: 0.2160 - val_accuracy: 0.9272\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1327 - accuracy: 0.9494 - val_loss: 0.2183 - val_accuracy: 0.9274\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1237 - accuracy: 0.9533 - val_loss: 0.2130 - val_accuracy: 0.9282\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1185 - accuracy: 0.9535 - val_loss: 0.2282 - val_accuracy: 0.9267\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1161 - accuracy: 0.9552 - val_loss: 0.2233 - val_accuracy: 0.9300\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1078 - accuracy: 0.9587 - val_loss: 0.2233 - val_accuracy: 0.9302\n",
      "Epoch 21/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1062 - accuracy: 0.9591 - val_loss: 0.2326 - val_accuracy: 0.9317\n",
      "Epoch 00021: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5494 - accuracy: 0.8052 - val_loss: 0.3313 - val_accuracy: 0.8822\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3515 - accuracy: 0.8761 - val_loss: 0.2939 - val_accuracy: 0.8908\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3020 - accuracy: 0.8910 - val_loss: 0.2648 - val_accuracy: 0.9050\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2676 - accuracy: 0.9032 - val_loss: 0.2472 - val_accuracy: 0.9112\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2428 - accuracy: 0.9107 - val_loss: 0.2454 - val_accuracy: 0.9098\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2205 - accuracy: 0.9190 - val_loss: 0.2306 - val_accuracy: 0.9179\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2045 - accuracy: 0.9240 - val_loss: 0.2277 - val_accuracy: 0.9167\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.1885 - accuracy: 0.9290 - val_loss: 0.2352 - val_accuracy: 0.9186\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1771 - accuracy: 0.9336 - val_loss: 0.2271 - val_accuracy: 0.9205\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1631 - accuracy: 0.9395 - val_loss: 0.2249 - val_accuracy: 0.9220\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1553 - accuracy: 0.9413 - val_loss: 0.2368 - val_accuracy: 0.9195\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1402 - accuracy: 0.9468 - val_loss: 0.2303 - val_accuracy: 0.9260\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1349 - accuracy: 0.9490 - val_loss: 0.2314 - val_accuracy: 0.9264\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1244 - accuracy: 0.9521 - val_loss: 0.2282 - val_accuracy: 0.9286\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1214 - accuracy: 0.9533 - val_loss: 0.2361 - val_accuracy: 0.9265\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1118 - accuracy: 0.9571 - val_loss: 0.2287 - val_accuracy: 0.9280\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1064 - accuracy: 0.9597 - val_loss: 0.2512 - val_accuracy: 0.9245\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1060 - accuracy: 0.9588 - val_loss: 0.2529 - val_accuracy: 0.9240\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.0955 - accuracy: 0.9641 - val_loss: 0.2462 - val_accuracy: 0.9283\n",
      "Epoch 20/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0938 - accuracy: 0.9646 - val_loss: 0.2535 - val_accuracy: 0.9282\n",
      "Epoch 00020: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5805 - accuracy: 0.7952 - val_loss: 0.3353 - val_accuracy: 0.8802\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3749 - accuracy: 0.8676 - val_loss: 0.2923 - val_accuracy: 0.8948\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3199 - accuracy: 0.8864 - val_loss: 0.2592 - val_accuracy: 0.9073\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2877 - accuracy: 0.8961 - val_loss: 0.2463 - val_accuracy: 0.9081\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2634 - accuracy: 0.9039 - val_loss: 0.2309 - val_accuracy: 0.9164\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2386 - accuracy: 0.9121 - val_loss: 0.2224 - val_accuracy: 0.9180\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2224 - accuracy: 0.9187 - val_loss: 0.2197 - val_accuracy: 0.9200\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2051 - accuracy: 0.9250 - val_loss: 0.2127 - val_accuracy: 0.9247\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1910 - accuracy: 0.9298 - val_loss: 0.2093 - val_accuracy: 0.9248\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1799 - accuracy: 0.9334 - val_loss: 0.2166 - val_accuracy: 0.9250\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1692 - accuracy: 0.9371 - val_loss: 0.2072 - val_accuracy: 0.9266\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1540 - accuracy: 0.9425 - val_loss: 0.2151 - val_accuracy: 0.9246\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1470 - accuracy: 0.9448 - val_loss: 0.2123 - val_accuracy: 0.9265\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1405 - accuracy: 0.9474 - val_loss: 0.2217 - val_accuracy: 0.9274\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1320 - accuracy: 0.9501 - val_loss: 0.2136 - val_accuracy: 0.9283\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1241 - accuracy: 0.9532 - val_loss: 0.2305 - val_accuracy: 0.9270\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1202 - accuracy: 0.9541 - val_loss: 0.2218 - val_accuracy: 0.9296\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1151 - accuracy: 0.9557 - val_loss: 0.2230 - val_accuracy: 0.9283\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1056 - accuracy: 0.9599 - val_loss: 0.2236 - val_accuracy: 0.9283\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1034 - accuracy: 0.9599 - val_loss: 0.2312 - val_accuracy: 0.9305\n",
      "Epoch 21/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0988 - accuracy: 0.9618 - val_loss: 0.2417 - val_accuracy: 0.9295\n",
      "Epoch 00021: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5638 - accuracy: 0.8026 - val_loss: 0.3489 - val_accuracy: 0.8700\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3681 - accuracy: 0.8703 - val_loss: 0.3002 - val_accuracy: 0.8890\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3133 - accuracy: 0.8863 - val_loss: 0.2695 - val_accuracy: 0.8995\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2817 - accuracy: 0.8969 - val_loss: 0.2544 - val_accuracy: 0.9061\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2540 - accuracy: 0.9081 - val_loss: 0.2469 - val_accuracy: 0.9114\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2354 - accuracy: 0.9133 - val_loss: 0.2331 - val_accuracy: 0.9142\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2163 - accuracy: 0.9207 - val_loss: 0.2210 - val_accuracy: 0.9229\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2036 - accuracy: 0.9250 - val_loss: 0.2204 - val_accuracy: 0.9214\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1898 - accuracy: 0.9280 - val_loss: 0.2171 - val_accuracy: 0.9237\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1776 - accuracy: 0.9331 - val_loss: 0.2172 - val_accuracy: 0.9222\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1682 - accuracy: 0.9371 - val_loss: 0.2217 - val_accuracy: 0.9206\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1584 - accuracy: 0.9401 - val_loss: 0.2205 - val_accuracy: 0.9260\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1484 - accuracy: 0.9446 - val_loss: 0.2267 - val_accuracy: 0.9274\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1416 - accuracy: 0.9455 - val_loss: 0.2284 - val_accuracy: 0.9244\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1357 - accuracy: 0.9496 - val_loss: 0.2267 - val_accuracy: 0.9295\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1229 - accuracy: 0.9533 - val_loss: 0.2317 - val_accuracy: 0.9296\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1234 - accuracy: 0.9529 - val_loss: 0.2340 - val_accuracy: 0.9285\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1134 - accuracy: 0.9559 - val_loss: 0.2330 - val_accuracy: 0.9282\n",
      "Epoch 19/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1091 - accuracy: 0.9577 - val_loss: 0.2430 - val_accuracy: 0.9297\n",
      "Epoch 00019: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5754 - accuracy: 0.7974 - val_loss: 0.3435 - val_accuracy: 0.8720\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3689 - accuracy: 0.8693 - val_loss: 0.2862 - val_accuracy: 0.8892\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3160 - accuracy: 0.8860 - val_loss: 0.2671 - val_accuracy: 0.8992\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2818 - accuracy: 0.8973 - val_loss: 0.2424 - val_accuracy: 0.9081\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2563 - accuracy: 0.9065 - val_loss: 0.2287 - val_accuracy: 0.9142\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2340 - accuracy: 0.9150 - val_loss: 0.2143 - val_accuracy: 0.9178\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2172 - accuracy: 0.9184 - val_loss: 0.2171 - val_accuracy: 0.9182\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2002 - accuracy: 0.9261 - val_loss: 0.2167 - val_accuracy: 0.9238\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.1915 - accuracy: 0.9290 - val_loss: 0.2033 - val_accuracy: 0.9248\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1784 - accuracy: 0.9341 - val_loss: 0.2078 - val_accuracy: 0.9233\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1653 - accuracy: 0.9388 - val_loss: 0.2051 - val_accuracy: 0.9275\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1568 - accuracy: 0.9411 - val_loss: 0.2043 - val_accuracy: 0.9307\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1449 - accuracy: 0.9457 - val_loss: 0.2075 - val_accuracy: 0.9303\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1355 - accuracy: 0.9489 - val_loss: 0.2135 - val_accuracy: 0.9273\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1291 - accuracy: 0.9516 - val_loss: 0.2059 - val_accuracy: 0.9311\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1242 - accuracy: 0.9528 - val_loss: 0.2105 - val_accuracy: 0.9287\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1161 - accuracy: 0.9562 - val_loss: 0.2127 - val_accuracy: 0.9318\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1094 - accuracy: 0.9577 - val_loss: 0.2263 - val_accuracy: 0.9302\n",
      "Epoch 19/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.1051 - accuracy: 0.9591 - val_loss: 0.2207 - val_accuracy: 0.9314\n",
      "Epoch 00019: early stopping\n",
      "Train on 50000 samples, validate on 10000 samples\n",
      "Epoch 1/100\n",
      "50000/50000 - 4s - loss: 0.5779 - accuracy: 0.7940 - val_loss: 0.3542 - val_accuracy: 0.8728\n",
      "Epoch 2/100\n",
      "50000/50000 - 3s - loss: 0.3768 - accuracy: 0.8651 - val_loss: 0.2953 - val_accuracy: 0.8937\n",
      "Epoch 3/100\n",
      "50000/50000 - 3s - loss: 0.3280 - accuracy: 0.8815 - val_loss: 0.2683 - val_accuracy: 0.9014\n",
      "Epoch 4/100\n",
      "50000/50000 - 3s - loss: 0.2948 - accuracy: 0.8934 - val_loss: 0.2535 - val_accuracy: 0.9054\n",
      "Epoch 5/100\n",
      "50000/50000 - 3s - loss: 0.2708 - accuracy: 0.9006 - val_loss: 0.2422 - val_accuracy: 0.9116\n",
      "Epoch 6/100\n",
      "50000/50000 - 3s - loss: 0.2523 - accuracy: 0.9079 - val_loss: 0.2369 - val_accuracy: 0.9140\n",
      "Epoch 7/100\n",
      "50000/50000 - 3s - loss: 0.2348 - accuracy: 0.9127 - val_loss: 0.2278 - val_accuracy: 0.9194\n",
      "Epoch 8/100\n",
      "50000/50000 - 3s - loss: 0.2194 - accuracy: 0.9185 - val_loss: 0.2208 - val_accuracy: 0.9211\n",
      "Epoch 9/100\n",
      "50000/50000 - 3s - loss: 0.2052 - accuracy: 0.9238 - val_loss: 0.2229 - val_accuracy: 0.9205\n",
      "Epoch 10/100\n",
      "50000/50000 - 3s - loss: 0.1934 - accuracy: 0.9278 - val_loss: 0.2236 - val_accuracy: 0.9243\n",
      "Epoch 11/100\n",
      "50000/50000 - 3s - loss: 0.1820 - accuracy: 0.9330 - val_loss: 0.2174 - val_accuracy: 0.9222\n",
      "Epoch 12/100\n",
      "50000/50000 - 3s - loss: 0.1750 - accuracy: 0.9341 - val_loss: 0.2131 - val_accuracy: 0.9271\n",
      "Epoch 13/100\n",
      "50000/50000 - 3s - loss: 0.1641 - accuracy: 0.9374 - val_loss: 0.2315 - val_accuracy: 0.9260\n",
      "Epoch 14/100\n",
      "50000/50000 - 3s - loss: 0.1543 - accuracy: 0.9406 - val_loss: 0.2141 - val_accuracy: 0.9264\n",
      "Epoch 15/100\n",
      "50000/50000 - 3s - loss: 0.1471 - accuracy: 0.9435 - val_loss: 0.2094 - val_accuracy: 0.9293\n",
      "Epoch 16/100\n",
      "50000/50000 - 3s - loss: 0.1379 - accuracy: 0.9483 - val_loss: 0.2244 - val_accuracy: 0.9310\n",
      "Epoch 17/100\n",
      "50000/50000 - 3s - loss: 0.1345 - accuracy: 0.9489 - val_loss: 0.2213 - val_accuracy: 0.9287\n",
      "Epoch 18/100\n",
      "50000/50000 - 3s - loss: 0.1290 - accuracy: 0.9513 - val_loss: 0.2201 - val_accuracy: 0.9263\n",
      "Epoch 19/100\n",
      "50000/50000 - 3s - loss: 0.1218 - accuracy: 0.9531 - val_loss: 0.2248 - val_accuracy: 0.9309\n",
      "Epoch 20/100\n",
      "50000/50000 - 3s - loss: 0.1180 - accuracy: 0.9548 - val_loss: 0.2339 - val_accuracy: 0.9322\n",
      "Epoch 21/100\n",
      "50000/50000 - 3s - loss: 0.1129 - accuracy: 0.9569 - val_loss: 0.2349 - val_accuracy: 0.9306\n",
      "Epoch 22/100\n",
      "50000/50000 - 3s - loss: 0.1087 - accuracy: 0.9577 - val_loss: 0.2497 - val_accuracy: 0.9298\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 23/100\n",
      "50000/50000 - 3s - loss: 0.1040 - accuracy: 0.9604 - val_loss: 0.2477 - val_accuracy: 0.9295\n",
      "Epoch 24/100\n",
      "50000/50000 - 3s - loss: 0.1025 - accuracy: 0.9593 - val_loss: 0.2413 - val_accuracy: 0.9333\n",
      "Epoch 25/100\n",
      "Restoring model weights from the end of the best epoch.\n",
      "50000/50000 - 3s - loss: 0.0965 - accuracy: 0.9624 - val_loss: 0.2610 - val_accuracy: 0.9327\n",
      "Epoch 00025: early stopping\n"
     ]
    }
   ],
   "source": [
    "from vpbnn import vlayers\n",
    "from vpbnn.models import nn2vpbnn\n",
    "\n",
    "for last_activation in LAST_ACTIVATIONS:\n",
    "    for seed in range(args.max_seed):\n",
    "        set_experimental_environment(seed)\n",
    "        \n",
    "        (x_train, y_train), (x_val, y_val), (_, _), num_classes = load_dataset(\n",
    "            args.train_domain\n",
    "        )        \n",
    "        \n",
    "        input_shape = x_train.shape[1:]\n",
    "        model = create_bayes_model(\n",
    "            args.bayes_model, input_shape, num_classes, last_activation\n",
    "        )\n",
    "\n",
    "        if last_activation == \"softmax\":\n",
    "            loss_func = \"categorical_crossentropy\"\n",
    "            calc_entropy = calc_softmax_entropy\n",
    "        elif last_activation == \"sigmoid\":\n",
    "            loss_func = \"binary_crossentropy\"\n",
    "            calc_entropy = calc_sigmoid_entropy\n",
    "        else:\n",
    "            raise ValueError()\n",
    "\n",
    "        es = keras.callbacks.EarlyStopping(\n",
    "            monitor=\"val_loss\",\n",
    "            min_delta=0,\n",
    "            patience=args.patience,\n",
    "            verbose=1,\n",
    "            mode=\"auto\",\n",
    "            restore_best_weights=True,\n",
    "        )\n",
    "\n",
    "        model.compile(\n",
    "            loss=loss_func, optimizer=args.optimizer, metrics=[\"accuracy\"],\n",
    "        )\n",
    "\n",
    "        model.fit(\n",
    "            x_train,\n",
    "            y_train,\n",
    "            batch_size=args.train_batch_size,\n",
    "            epochs=args.max_epochs,\n",
    "            verbose=2,\n",
    "            validation_data=(x_val, y_val),\n",
    "            callbacks=[es],\n",
    "        )\n",
    "        \n",
    "        if last_activation == \"sigmoid\":\n",
    "            best_rho = None\n",
    "            best_ll = -np.inf\n",
    "            for rho in args.rhos:\n",
    "                vmodel = nn2vpbnn(model, variance_mode=3, rho=rho)\n",
    "                y_prob, y_var = vmodel.predict(x_val, batch_size=args.test_batch_size)\n",
    "                ll = -0.5 * np.square(y_val - y_prob) / np.maximum(y_var, 1.0e-7) - 0.5 * np.log(2.0 * np.pi) - 0.5 * np.log(np.maximum(y_var, 1.0e-7))\n",
    "                ll = ll.mean()\n",
    "                print(\"rho: {0}, ll: {1}\".format(rho, ll))                \n",
    "                if ll > best_ll:\n",
    "                    best_rho = rho\n",
    "                    best_ll = ll\n",
    "            print(\"best_rho: {0}, best_ll: {1}\".format(best_rho, best_ll))\n",
    "            vmodel = nn2vpbnn(model, rho=best_rho)\n",
    "        else:\n",
    "            vmodel = nn2vpbnn(model)\n",
    "        \n",
    "        score_dfs = []\n",
    "        for prediction_type in PREDICTION_MODES:\n",
    "            for test_domain in DATASETS:\n",
    "                (_, _), (_, _), (x_test, y_test), _ = load_dataset(test_domain)\n",
    "                if prediction_type == PREDICTION_MODES[0]:\n",
    "                    for layer in vmodel.layers:\n",
    "                        if isinstance(layer, vlayers.VarianceLayer):\n",
    "                            layer.variance_mode = 0\n",
    "                    vmodel.compile(loss=loss_func)\n",
    "\n",
    "                    y_prob, _ = vmodel.predict(x_test, batch_size=args.test_batch_size)\n",
    "                    y_pred = np.argmax(y_prob, axis=-1)\n",
    "                    y_entropy = calc_entropy(y_prob)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(prediction_type, \"entropy\"),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_entropy,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                elif prediction_type == PREDICTION_MODES[1]:\n",
    "                    for layer in vmodel.layers:\n",
    "                        if isinstance(layer, vlayers.VarianceLayer):\n",
    "                            layer.variance_mode = 1\n",
    "                    vmodel.compile(loss=loss_func)\n",
    "\n",
    "                    y_prob, y_var = vmodel.predict(x_test, batch_size=args.test_batch_size)\n",
    "                    y_pred = np.argmax(y_prob, axis=-1)\n",
    "                    y_entropy = calc_entropy(y_prob)\n",
    "                    y_mean_std = np.mean(np.sqrt(y_var), axis=-1)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(prediction_type, \"entropy\"),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_entropy,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(\n",
    "                                prediction_type, \"mean-std\"\n",
    "                            ),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_mean_std,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                elif prediction_type == PREDICTION_MODES[2]:\n",
    "                    for layer in vmodel.layers:\n",
    "                        if isinstance(layer, vlayers.VarianceLayer):\n",
    "                            layer.variance_mode = 2\n",
    "                    vmodel.compile(loss=loss_func)\n",
    "\n",
    "                    y_prob, y_var = vmodel.predict(x_test, batch_size=args.test_batch_size)\n",
    "                    y_pred = np.argmax(y_prob, axis=-1)\n",
    "                    y_entropy = calc_entropy(y_prob)\n",
    "                    y_mean_std = np.mean(np.sqrt(y_var), axis=-1)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(prediction_type, \"entropy\"),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_entropy,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(\n",
    "                                prediction_type, \"mean-std\"\n",
    "                            ),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_mean_std,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                elif prediction_type == PREDICTION_MODES[3]:\n",
    "                    for layer in vmodel.layers:\n",
    "                        if isinstance(layer, vlayers.VarianceLayer):\n",
    "                            layer.variance_mode = 3\n",
    "                    vmodel.compile(loss=loss_func)\n",
    "\n",
    "                    y_prob, y_var = vmodel.predict(x_test, batch_size=args.test_batch_size)\n",
    "                    y_pred = np.argmax(y_prob, axis=-1)\n",
    "                    y_entropy = calc_entropy(y_prob)\n",
    "                    y_mean_std = np.mean(np.sqrt(y_var), axis=-1)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(prediction_type, \"entropy\"),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_entropy,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(\n",
    "                                prediction_type, \"mean-std\"\n",
    "                            ),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_mean_std,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)            \n",
    "                elif prediction_type == PREDICTION_MODES[4]:\n",
    "                    for layer in vmodel.layers:\n",
    "                        if isinstance(layer, vlayers.VarianceLayer):\n",
    "                            layer.variance_mode = 4\n",
    "                    vmodel.compile(loss=loss_func)\n",
    "\n",
    "                    y_probs = []\n",
    "                    for _ in range(args.num_mc):\n",
    "                        for layer in model.layers:\n",
    "                            if isinstance(layer, vlayers.VarianceDropout):\n",
    "                                layer.update_dropout_mask()\n",
    "                        y_prob, _ = vmodel.predict(x_test, batch_size=args.test_batch_size)\n",
    "                        y_probs.append(y_prob)\n",
    "                    y_probs = np.array(y_probs)\n",
    "                    y_prob = y_probs.mean(axis=0)\n",
    "                    y_pred = np.argmax(y_prob, axis=-1)\n",
    "                    y_entropy = calc_entropy(y_prob)\n",
    "                    y_mean_std = np.mean(y_probs.std(axis=0, ddof=1), axis=-1)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(prediction_type, \"entropy\"),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_entropy,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                    score_df = pd.DataFrame(\n",
    "                        {\n",
    "                            \"prediction_type\": \"{0}_{1}\".format(\n",
    "                                prediction_type, \"mean-std\"\n",
    "                            ),\n",
    "                            \"train_domain\": args.train_domain,\n",
    "                            \"test_domain\": test_domain,\n",
    "                            \"y_index\": np.arange(x_test.shape[0]),\n",
    "                            \"y_test\": np.argmax(y_test, axis=-1),\n",
    "                            \"y_pred\": y_pred,\n",
    "                            \"y_prob\": np.max(y_prob, axis=-1),\n",
    "                            \"y_uncertainty\": y_mean_std,\n",
    "                        }\n",
    "                    )\n",
    "                    score_dfs.append(score_df)\n",
    "                else:\n",
    "                    raise ValueError()\n",
    "        score_df = pd.concat(score_dfs, ignore_index=True)\n",
    "        score_df[\"seed\"] = seed\n",
    "        score_df[\"bayes_model\"] = args.bayes_model\n",
    "        score_df[\"last_activation\"] = last_activation    \n",
    "        score_df.to_csv(\"score/ood_{0}_{1:03}.csv\".format(last_activation, seed), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<svg height=\"719pt\" viewBox=\"0.00 0.00 353.00 719.00\" width=\"353pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g class=\"graph\" id=\"graph0\" transform=\"scale(1 1) rotate(0) translate(4 715)\">\n",
       "<title>G</title>\n",
       "<polygon fill=\"white\" points=\"-4,4 -4,-715 349,-715 349,4 -4,4\" stroke=\"none\"/>\n",
       "<!-- 2213129571072 -->\n",
       "<g class=\"node\" id=\"node1\"><title>2213129571072</title>\n",
       "<polygon fill=\"none\" points=\"33,-664.5 33,-710.5 312,-710.5 312,-664.5 33,-664.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"96\" y=\"-683.8\">input_1: InputLayer</text>\n",
       "<polyline fill=\"none\" points=\"159,-664.5 159,-710.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"187\" y=\"-695.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"159,-687.5 215,-687.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"187\" y=\"-672.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"215,-664.5 215,-710.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"263.5\" y=\"-695.3\">[(?, 28, 28, 1)]</text>\n",
       "<polyline fill=\"none\" points=\"215,-687.5 312,-687.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"263.5\" y=\"-672.3\">[(?, 28, 28, 1)]</text>\n",
       "</g>\n",
       "<!-- 2213129571800 -->\n",
       "<g class=\"node\" id=\"node2\"><title>2213129571800</title>\n",
       "<polygon fill=\"none\" points=\"40.5,-581.5 40.5,-627.5 304.5,-627.5 304.5,-581.5 40.5,-581.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"97\" y=\"-600.8\">conv2d: Conv2D</text>\n",
       "<polyline fill=\"none\" points=\"153.5,-581.5 153.5,-627.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"181.5\" y=\"-612.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"153.5,-604.5 209.5,-604.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"181.5\" y=\"-589.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"209.5,-581.5 209.5,-627.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"257\" y=\"-612.3\">(?, 28, 28, 1)</text>\n",
       "<polyline fill=\"none\" points=\"209.5,-604.5 304.5,-604.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"257\" y=\"-589.3\">(?, 26, 26, 32)</text>\n",
       "</g>\n",
       "<!-- 2213129571072&#45;&gt;2213129571800 -->\n",
       "<g class=\"edge\" id=\"edge1\"><title>2213129571072-&gt;2213129571800</title>\n",
       "<path d=\"M172.5,-664.366C172.5,-656.152 172.5,-646.658 172.5,-637.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-637.607 172.5,-627.607 169,-637.607 176,-637.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2213129571520 -->\n",
       "<g class=\"node\" id=\"node3\"><title>2213129571520</title>\n",
       "<polygon fill=\"none\" points=\"33.5,-498.5 33.5,-544.5 311.5,-544.5 311.5,-498.5 33.5,-498.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"97\" y=\"-517.8\">conv2d_1: Conv2D</text>\n",
       "<polyline fill=\"none\" points=\"160.5,-498.5 160.5,-544.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"188.5\" y=\"-529.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"160.5,-521.5 216.5,-521.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"188.5\" y=\"-506.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"216.5,-498.5 216.5,-544.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"264\" y=\"-529.3\">(?, 26, 26, 32)</text>\n",
       "<polyline fill=\"none\" points=\"216.5,-521.5 311.5,-521.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"264\" y=\"-506.3\">(?, 24, 24, 64)</text>\n",
       "</g>\n",
       "<!-- 2213129571800&#45;&gt;2213129571520 -->\n",
       "<g class=\"edge\" id=\"edge2\"><title>2213129571800-&gt;2213129571520</title>\n",
       "<path d=\"M172.5,-581.366C172.5,-573.152 172.5,-563.658 172.5,-554.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-554.607 172.5,-544.607 169,-554.607 176,-554.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2212804663840 -->\n",
       "<g class=\"node\" id=\"node4\"><title>2212804663840</title>\n",
       "<polygon fill=\"none\" points=\"0,-415.5 0,-461.5 345,-461.5 345,-415.5 0,-415.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"97\" y=\"-434.8\">max_pooling2d: MaxPooling2D</text>\n",
       "<polyline fill=\"none\" points=\"194,-415.5 194,-461.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"222\" y=\"-446.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"194,-438.5 250,-438.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"222\" y=\"-423.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"250,-415.5 250,-461.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"297.5\" y=\"-446.3\">(?, 24, 24, 64)</text>\n",
       "<polyline fill=\"none\" points=\"250,-438.5 345,-438.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"297.5\" y=\"-423.3\">(?, 12, 12, 64)</text>\n",
       "</g>\n",
       "<!-- 2213129571520&#45;&gt;2212804663840 -->\n",
       "<g class=\"edge\" id=\"edge3\"><title>2213129571520-&gt;2212804663840</title>\n",
       "<path d=\"M172.5,-498.366C172.5,-490.152 172.5,-480.658 172.5,-471.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-471.607 172.5,-461.607 169,-471.607 176,-471.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2212804663280 -->\n",
       "<g class=\"node\" id=\"node5\"><title>2212804663280</title>\n",
       "<polygon fill=\"none\" points=\"39,-332.5 39,-378.5 306,-378.5 306,-332.5 39,-332.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"97\" y=\"-351.8\">dropout: Dropout</text>\n",
       "<polyline fill=\"none\" points=\"155,-332.5 155,-378.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"183\" y=\"-363.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"155,-355.5 211,-355.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"183\" y=\"-340.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"211,-332.5 211,-378.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"258.5\" y=\"-363.3\">(?, 12, 12, 64)</text>\n",
       "<polyline fill=\"none\" points=\"211,-355.5 306,-355.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"258.5\" y=\"-340.3\">(?, 12, 12, 64)</text>\n",
       "</g>\n",
       "<!-- 2212804663840&#45;&gt;2212804663280 -->\n",
       "<g class=\"edge\" id=\"edge4\"><title>2212804663840-&gt;2212804663280</title>\n",
       "<path d=\"M172.5,-415.366C172.5,-407.152 172.5,-397.658 172.5,-388.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-388.607 172.5,-378.607 169,-388.607 176,-388.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2213900019584 -->\n",
       "<g class=\"node\" id=\"node6\"><title>2213900019584</title>\n",
       "<polygon fill=\"none\" points=\"49.5,-249.5 49.5,-295.5 295.5,-295.5 295.5,-249.5 49.5,-249.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"97\" y=\"-268.8\">flatten: Flatten</text>\n",
       "<polyline fill=\"none\" points=\"144.5,-249.5 144.5,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"172.5\" y=\"-280.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"144.5,-272.5 200.5,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"172.5\" y=\"-257.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"200.5,-249.5 200.5,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"248\" y=\"-280.3\">(?, 12, 12, 64)</text>\n",
       "<polyline fill=\"none\" points=\"200.5,-272.5 295.5,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"248\" y=\"-257.3\">(?, 9216)</text>\n",
       "</g>\n",
       "<!-- 2212804663280&#45;&gt;2213900019584 -->\n",
       "<g class=\"edge\" id=\"edge5\"><title>2212804663280-&gt;2213900019584</title>\n",
       "<path d=\"M172.5,-332.366C172.5,-324.152 172.5,-314.658 172.5,-305.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-305.607 172.5,-295.607 169,-305.607 176,-305.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2213900021488 -->\n",
       "<g class=\"node\" id=\"node7\"><title>2213900021488</title>\n",
       "<polygon fill=\"none\" points=\"66.5,-166.5 66.5,-212.5 278.5,-212.5 278.5,-166.5 66.5,-166.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"111.5\" y=\"-185.8\">dense: Dense</text>\n",
       "<polyline fill=\"none\" points=\"156.5,-166.5 156.5,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"184.5\" y=\"-197.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"156.5,-189.5 212.5,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"184.5\" y=\"-174.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"212.5,-166.5 212.5,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"245.5\" y=\"-197.3\">(?, 9216)</text>\n",
       "<polyline fill=\"none\" points=\"212.5,-189.5 278.5,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"245.5\" y=\"-174.3\">(?, 128)</text>\n",
       "</g>\n",
       "<!-- 2213900019584&#45;&gt;2213900021488 -->\n",
       "<g class=\"edge\" id=\"edge6\"><title>2213900019584-&gt;2213900021488</title>\n",
       "<path d=\"M172.5,-249.366C172.5,-241.152 172.5,-231.658 172.5,-222.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-222.607 172.5,-212.607 169,-222.607 176,-222.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2205257898640 -->\n",
       "<g class=\"node\" id=\"node8\"><title>2205257898640</title>\n",
       "<polygon fill=\"none\" points=\"50,-83.5 50,-129.5 295,-129.5 295,-83.5 50,-83.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"115\" y=\"-102.8\">dropout_1: Dropout</text>\n",
       "<polyline fill=\"none\" points=\"180,-83.5 180,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"208\" y=\"-114.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"180,-106.5 236,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"208\" y=\"-91.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"236,-83.5 236,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"265.5\" y=\"-114.3\">(?, 128)</text>\n",
       "<polyline fill=\"none\" points=\"236,-106.5 295,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"265.5\" y=\"-91.3\">(?, 128)</text>\n",
       "</g>\n",
       "<!-- 2213900021488&#45;&gt;2205257898640 -->\n",
       "<g class=\"edge\" id=\"edge7\"><title>2213900021488-&gt;2205257898640</title>\n",
       "<path d=\"M172.5,-166.366C172.5,-158.152 172.5,-148.658 172.5,-139.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-139.607 172.5,-129.607 169,-139.607 176,-139.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2212198992416 -->\n",
       "<g class=\"node\" id=\"node9\"><title>2212198992416</title>\n",
       "<polygon fill=\"none\" points=\"63,-0.5 63,-46.5 282,-46.5 282,-0.5 63,-0.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"115\" y=\"-19.8\">dense_1: Dense</text>\n",
       "<polyline fill=\"none\" points=\"167,-0.5 167,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"195\" y=\"-31.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"167,-23.5 223,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"195\" y=\"-8.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"223,-0.5 223,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"252.5\" y=\"-31.3\">(?, 128)</text>\n",
       "<polyline fill=\"none\" points=\"223,-23.5 282,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"252.5\" y=\"-8.3\">(?, 10)</text>\n",
       "</g>\n",
       "<!-- 2205257898640&#45;&gt;2212198992416 -->\n",
       "<g class=\"edge\" id=\"edge8\"><title>2205257898640-&gt;2212198992416</title>\n",
       "<path d=\"M172.5,-83.3664C172.5,-75.1516 172.5,-65.6579 172.5,-56.7252\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"176,-56.6068 172.5,-46.6068 169,-56.6069 176,-56.6068\" stroke=\"black\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_to_dot(model, show_shapes=True).write_pdf(\"img/ood_architecture-nn.pdf\")\n",
    "SVG(model_to_dot(model, show_shapes=True, dpi=72).create(prog='dot', format='svg'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<svg height=\"885pt\" viewBox=\"0.00 0.00 536.00 885.00\" width=\"536pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g class=\"graph\" id=\"graph0\" transform=\"scale(1 1) rotate(0) translate(4 881)\">\n",
       "<title>G</title>\n",
       "<polygon fill=\"white\" points=\"-4,4 -4,-881 532,-881 532,4 -4,4\" stroke=\"none\"/>\n",
       "<!-- 2214069487656 -->\n",
       "<g class=\"node\" id=\"node1\"><title>2214069487656</title>\n",
       "<polygon fill=\"none\" points=\"127,-830.5 127,-876.5 406,-876.5 406,-830.5 127,-830.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"190\" y=\"-849.8\">input_1: InputLayer</text>\n",
       "<polyline fill=\"none\" points=\"253,-830.5 253,-876.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"281\" y=\"-861.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"253,-853.5 309,-853.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"281\" y=\"-838.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"309,-830.5 309,-876.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"357.5\" y=\"-861.3\">[(?, 28, 28, 1)]</text>\n",
       "<polyline fill=\"none\" points=\"309,-853.5 406,-853.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"357.5\" y=\"-838.3\">[(?, 28, 28, 1)]</text>\n",
       "</g>\n",
       "<!-- 2214069487600 -->\n",
       "<g class=\"node\" id=\"node2\"><title>2214069487600</title>\n",
       "<polygon fill=\"none\" points=\"134.5,-747.5 134.5,-793.5 398.5,-793.5 398.5,-747.5 134.5,-747.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"191\" y=\"-766.8\">conv2d: Conv2D</text>\n",
       "<polyline fill=\"none\" points=\"247.5,-747.5 247.5,-793.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"275.5\" y=\"-778.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"247.5,-770.5 303.5,-770.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"275.5\" y=\"-755.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"303.5,-747.5 303.5,-793.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"351\" y=\"-778.3\">(?, 28, 28, 1)</text>\n",
       "<polyline fill=\"none\" points=\"303.5,-770.5 398.5,-770.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"351\" y=\"-755.3\">(?, 26, 26, 32)</text>\n",
       "</g>\n",
       "<!-- 2214069487656&#45;&gt;2214069487600 -->\n",
       "<g class=\"edge\" id=\"edge1\"><title>2214069487656-&gt;2214069487600</title>\n",
       "<path d=\"M266.5,-830.366C266.5,-822.152 266.5,-812.658 266.5,-803.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"270,-803.607 266.5,-793.607 263,-803.607 270,-803.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069487040 -->\n",
       "<g class=\"node\" id=\"node3\"><title>2214069487040</title>\n",
       "<polygon fill=\"none\" points=\"127.5,-664.5 127.5,-710.5 405.5,-710.5 405.5,-664.5 127.5,-664.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"191\" y=\"-683.8\">conv2d_1: Conv2D</text>\n",
       "<polyline fill=\"none\" points=\"254.5,-664.5 254.5,-710.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"282.5\" y=\"-695.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"254.5,-687.5 310.5,-687.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"282.5\" y=\"-672.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"310.5,-664.5 310.5,-710.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"358\" y=\"-695.3\">(?, 26, 26, 32)</text>\n",
       "<polyline fill=\"none\" points=\"310.5,-687.5 405.5,-687.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"358\" y=\"-672.3\">(?, 24, 24, 64)</text>\n",
       "</g>\n",
       "<!-- 2214069487600&#45;&gt;2214069487040 -->\n",
       "<g class=\"edge\" id=\"edge2\"><title>2214069487600-&gt;2214069487040</title>\n",
       "<path d=\"M266.5,-747.366C266.5,-739.152 266.5,-729.658 266.5,-720.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"270,-720.607 266.5,-710.607 263,-720.607 270,-720.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069488552 -->\n",
       "<g class=\"node\" id=\"node4\"><title>2214069488552</title>\n",
       "<polygon fill=\"none\" points=\"94,-581.5 94,-627.5 439,-627.5 439,-581.5 94,-581.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"191\" y=\"-600.8\">max_pooling2d: MaxPooling2D</text>\n",
       "<polyline fill=\"none\" points=\"288,-581.5 288,-627.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"316\" y=\"-612.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"288,-604.5 344,-604.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"316\" y=\"-589.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"344,-581.5 344,-627.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"391.5\" y=\"-612.3\">(?, 24, 24, 64)</text>\n",
       "<polyline fill=\"none\" points=\"344,-604.5 439,-604.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"391.5\" y=\"-589.3\">(?, 12, 12, 64)</text>\n",
       "</g>\n",
       "<!-- 2214069487040&#45;&gt;2214069488552 -->\n",
       "<g class=\"edge\" id=\"edge3\"><title>2214069487040-&gt;2214069488552</title>\n",
       "<path d=\"M266.5,-664.366C266.5,-656.152 266.5,-646.658 266.5,-637.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"270,-637.607 266.5,-627.607 263,-637.607 270,-637.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069487768 -->\n",
       "<g class=\"node\" id=\"node5\"><title>2214069487768</title>\n",
       "<polygon fill=\"none\" points=\"59.5,-498.5 59.5,-544.5 317.5,-544.5 317.5,-498.5 59.5,-498.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"113\" y=\"-517.8\">zero_1: Lambda</text>\n",
       "<polyline fill=\"none\" points=\"166.5,-498.5 166.5,-544.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"194.5\" y=\"-529.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"166.5,-521.5 222.5,-521.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"194.5\" y=\"-506.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"222.5,-498.5 222.5,-544.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"270\" y=\"-529.3\">(?, 12, 12, 64)</text>\n",
       "<polyline fill=\"none\" points=\"222.5,-521.5 317.5,-521.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"270\" y=\"-506.3\">(?, 12, 12, 64)</text>\n",
       "</g>\n",
       "<!-- 2214069488552&#45;&gt;2214069487768 -->\n",
       "<g class=\"edge\" id=\"edge4\"><title>2214069488552-&gt;2214069487768</title>\n",
       "<path d=\"M245.186,-581.366C236.416,-572.259 226.132,-561.579 216.748,-551.835\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"219.246,-549.382 209.788,-544.607 214.203,-554.238 219.246,-549.382\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069486144 -->\n",
       "<g class=\"node\" id=\"node6\"><title>2214069486144</title>\n",
       "<polygon fill=\"none\" points=\"62,-415.5 62,-461.5 471,-461.5 471,-415.5 62,-415.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"144\" y=\"-434.8\">dropout: VarianceDropout</text>\n",
       "<polyline fill=\"none\" points=\"226,-415.5 226,-461.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"254\" y=\"-446.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"226,-438.5 282,-438.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"254\" y=\"-423.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"282,-415.5 282,-461.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"376.5\" y=\"-446.3\">[(?, 12, 12, 64), (?, 12, 12, 64)]</text>\n",
       "<polyline fill=\"none\" points=\"282,-438.5 471,-438.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"376.5\" y=\"-423.3\">[(?, 12, 12, 64), (?, 12, 12, 64)]</text>\n",
       "</g>\n",
       "<!-- 2214069488552&#45;&gt;2214069486144 -->\n",
       "<g class=\"edge\" id=\"edge5\"><title>2214069488552-&gt;2214069486144</title>\n",
       "<path d=\"M297.987,-581.419C309.192,-571.682 320.508,-559.21 326.5,-545 334.616,-525.752 334.616,-517.248 326.5,-498 321.866,-487.01 314.046,-477.059 305.557,-468.609\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"307.697,-465.82 297.987,-461.581 302.934,-470.95 307.697,-465.82\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069487768&#45;&gt;2214069486144 -->\n",
       "<g class=\"edge\" id=\"edge6\"><title>2214069487768-&gt;2214069486144</title>\n",
       "<path d=\"M209.814,-498.366C218.584,-489.259 228.868,-478.579 238.252,-468.835\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"240.797,-471.238 245.212,-461.607 235.754,-466.382 240.797,-471.238\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069486480 -->\n",
       "<g class=\"node\" id=\"node7\"><title>2214069486480</title>\n",
       "<polygon fill=\"none\" points=\"72.5,-332.5 72.5,-378.5 460.5,-378.5 460.5,-332.5 72.5,-332.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"144\" y=\"-351.8\">flatten: VarianceFlatten</text>\n",
       "<polyline fill=\"none\" points=\"215.5,-332.5 215.5,-378.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"243.5\" y=\"-363.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"215.5,-355.5 271.5,-355.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"243.5\" y=\"-340.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"271.5,-332.5 271.5,-378.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"366\" y=\"-363.3\">[(?, 12, 12, 64), (?, 12, 12, 64)]</text>\n",
       "<polyline fill=\"none\" points=\"271.5,-355.5 460.5,-355.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"366\" y=\"-340.3\">[(?, 9216), (?, 9216)]</text>\n",
       "</g>\n",
       "<!-- 2214069486144&#45;&gt;2214069486480 -->\n",
       "<g class=\"edge\" id=\"edge7\"><title>2214069486144-&gt;2214069486480</title>\n",
       "<path d=\"M266.5,-415.366C266.5,-407.152 266.5,-397.658 266.5,-388.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"270,-388.607 266.5,-378.607 263,-388.607 270,-388.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069485976 -->\n",
       "<g class=\"node\" id=\"node8\"><title>2214069485976</title>\n",
       "<polygon fill=\"none\" points=\"103,-249.5 103,-295.5 430,-295.5 430,-249.5 103,-249.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"172.5\" y=\"-268.8\">dense: VarianceDense</text>\n",
       "<polyline fill=\"none\" points=\"242,-249.5 242,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"270\" y=\"-280.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"242,-272.5 298,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"270\" y=\"-257.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"298,-249.5 298,-295.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"364\" y=\"-280.3\">[(?, 9216), (?, 9216)]</text>\n",
       "<polyline fill=\"none\" points=\"298,-272.5 430,-272.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"364\" y=\"-257.3\">[(?, 128), (?, 128)]</text>\n",
       "</g>\n",
       "<!-- 2214069486480&#45;&gt;2214069485976 -->\n",
       "<g class=\"edge\" id=\"edge8\"><title>2214069486480-&gt;2214069485976</title>\n",
       "<path d=\"M266.5,-332.366C266.5,-324.152 266.5,-314.658 266.5,-305.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"270,-305.607 266.5,-295.607 263,-305.607 270,-305.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069460664 -->\n",
       "<g class=\"node\" id=\"node9\"><title>2214069460664</title>\n",
       "<polygon fill=\"none\" points=\"89.5,-166.5 89.5,-212.5 443.5,-212.5 443.5,-166.5 89.5,-166.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"179\" y=\"-185.8\">dropout_1: VarianceDropout</text>\n",
       "<polyline fill=\"none\" points=\"268.5,-166.5 268.5,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"296.5\" y=\"-197.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"268.5,-189.5 324.5,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"296.5\" y=\"-174.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"324.5,-166.5 324.5,-212.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"384\" y=\"-197.3\">[(?, 128), (?, 128)]</text>\n",
       "<polyline fill=\"none\" points=\"324.5,-189.5 443.5,-189.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"384\" y=\"-174.3\">[(?, 128), (?, 128)]</text>\n",
       "</g>\n",
       "<!-- 2214069485976&#45;&gt;2214069460664 -->\n",
       "<g class=\"edge\" id=\"edge9\"><title>2214069485976-&gt;2214069460664</title>\n",
       "<path d=\"M266.5,-249.366C266.5,-241.152 266.5,-231.658 266.5,-222.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"270,-222.607 266.5,-212.607 263,-222.607 270,-222.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069460272 -->\n",
       "<g class=\"node\" id=\"node10\"><title>2214069460272</title>\n",
       "<polygon fill=\"none\" points=\"102.5,-83.5 102.5,-129.5 430.5,-129.5 430.5,-83.5 102.5,-83.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"179\" y=\"-102.8\">dense_1: VarianceDense</text>\n",
       "<polyline fill=\"none\" points=\"255.5,-83.5 255.5,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"283.5\" y=\"-114.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"255.5,-106.5 311.5,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"283.5\" y=\"-91.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"311.5,-83.5 311.5,-129.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"371\" y=\"-114.3\">[(?, 128), (?, 128)]</text>\n",
       "<polyline fill=\"none\" points=\"311.5,-106.5 430.5,-106.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"371\" y=\"-91.3\">[(?, 10), (?, 10)]</text>\n",
       "</g>\n",
       "<!-- 2214069460664&#45;&gt;2214069460272 -->\n",
       "<g class=\"edge\" id=\"edge10\"><title>2214069460664-&gt;2214069460272</title>\n",
       "<path d=\"M266.5,-166.366C266.5,-158.152 266.5,-148.658 266.5,-139.725\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"270,-139.607 266.5,-129.607 263,-139.607 270,-139.607\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069486424 -->\n",
       "<g class=\"node\" id=\"node11\"><title>2214069486424</title>\n",
       "<polygon fill=\"none\" points=\"0,-0.5 0,-46.5 261,-46.5 261,-0.5 0,-0.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"76\" y=\"-19.8\">mean_dense_1: Lambda</text>\n",
       "<polyline fill=\"none\" points=\"152,-0.5 152,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"180\" y=\"-31.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"152,-23.5 208,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"180\" y=\"-8.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"208,-0.5 208,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"234.5\" y=\"-31.3\">(?, 10)</text>\n",
       "<polyline fill=\"none\" points=\"208,-23.5 261,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"234.5\" y=\"-8.3\">(?, 10)</text>\n",
       "</g>\n",
       "<!-- 2214069460272&#45;&gt;2214069486424 -->\n",
       "<g class=\"edge\" id=\"edge11\"><title>2214069460272-&gt;2214069486424</title>\n",
       "<path d=\"M229.337,-83.3664C212.996,-73.6337 193.64,-62.1057 176.408,-51.8424\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"178,-48.7169 167.618,-46.6068 174.418,-54.731 178,-48.7169\" stroke=\"black\"/>\n",
       "</g>\n",
       "<!-- 2214069459544 -->\n",
       "<g class=\"node\" id=\"node12\"><title>2214069459544</title>\n",
       "<polygon fill=\"none\" points=\"279,-0.5 279,-46.5 528,-46.5 528,-0.5 279,-0.5\" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"349\" y=\"-19.8\">var_dense_1: Lambda</text>\n",
       "<polyline fill=\"none\" points=\"419,-0.5 419,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"447\" y=\"-31.3\">input:</text>\n",
       "<polyline fill=\"none\" points=\"419,-23.5 475,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"447\" y=\"-8.3\">output:</text>\n",
       "<polyline fill=\"none\" points=\"475,-0.5 475,-46.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"501.5\" y=\"-31.3\">(?, 10)</text>\n",
       "<polyline fill=\"none\" points=\"475,-23.5 528,-23.5 \" stroke=\"black\"/>\n",
       "<text font-family=\"Times New Roman,serif\" font-size=\"14.00\" text-anchor=\"middle\" x=\"501.5\" y=\"-8.3\">(?, 10)</text>\n",
       "</g>\n",
       "<!-- 2214069460272&#45;&gt;2214069459544 -->\n",
       "<g class=\"edge\" id=\"edge12\"><title>2214069460272-&gt;2214069459544</title>\n",
       "<path d=\"M303.936,-83.3664C320.397,-73.6337 339.895,-62.1057 357.254,-51.8424\" fill=\"none\" stroke=\"black\"/>\n",
       "<polygon fill=\"black\" points=\"359.283,-54.7091 366.109,-46.6068 355.72,-48.6835 359.283,-54.7091\" stroke=\"black\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>"
      ],
      "text/plain": [
       "<IPython.core.display.SVG object>"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vmodel = nn2vpbnn(model)\n",
    "model_to_dot(vmodel, show_shapes=True).write_pdf(\"img/ood_architecture-vpbnn.pdf\")\n",
    "SVG(model_to_dot(vmodel, show_shapes=True, dpi=72).create(prog='dot', format='svg'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_auc(df, train_domain, test_domain):\n",
    "    auc_dfs = []\n",
    "    for pred_type in df['prediction_type'].unique():\n",
    "        for bayes_model in df['bayes_model'].unique():\n",
    "            for activation in df['last_activation'].unique():            \n",
    "                cond_df = df[df['prediction_type'] == pred_type]\n",
    "                cond_df = cond_df[cond_df['bayes_model'] == bayes_model]\n",
    "                cond_df = cond_df[cond_df['last_activation'] == activation]\n",
    "                fpr, tpr, thresholds = metrics.roc_curve(cond_df['test_domain'], -cond_df['y_uncertainty'], pos_label=train_domain)\n",
    "                roc_auc = metrics.auc(fpr, tpr)\n",
    "                auc_df = pd.DataFrame({'train_domain': [train_domain], 'test_domain': [test_domain], 'prediction_type': [pred_type], 'bayes_model': [bayes_model], 'last_activation': [activation], 'auc': [roc_auc]})\n",
    "                auc_dfs.append(auc_df)\n",
    "        \n",
    "    auc_df = pd.concat(auc_dfs, ignore_index=True)\n",
    "    auc_df['seed'] = df['seed'].iloc[0]\n",
    "        \n",
    "    return auc_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "auc_dfs = []\n",
    "for csv_filepath in sorted(glob.glob('score/*.csv')):\n",
    "    score_df = pd.read_csv(csv_filepath)\n",
    "    for train_domain in score_df['train_domain'].unique():\n",
    "        other_domains = set(score_df['test_domain'].unique()) - set([train_domain])\n",
    "        for other_domain in other_domains:\n",
    "            cond = np.logical_and(score_df['train_domain'] == train_domain, np.logical_or(score_df['test_domain'] == train_domain, score_df['test_domain'] == other_domain))\n",
    "            auc_df = calc_auc(score_df[cond], train_domain, other_domain)\n",
    "            auc_dfs.append(auc_df)\n",
    "    auc_df = pd.concat(auc_dfs, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">auc</th>\n",
       "      <th>auc_str</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>train_domain</th>\n",
       "      <th>test_domain</th>\n",
       "      <th>prediction_type</th>\n",
       "      <th>bayes_model</th>\n",
       "      <th>last_activation</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"72\" valign=\"top\">Fashion</th>\n",
       "      <th rowspan=\"18\" valign=\"top\">EMNIST-MNIST</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.855732</td>\n",
       "      <td>0.015856</td>\n",
       "      <td>0.856±0.016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.899389</td>\n",
       "      <td>0.013707</td>\n",
       "      <td>0.899±0.014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.945595</td>\n",
       "      <td>0.015965</td>\n",
       "      <td>0.946±0.016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.923978</td>\n",
       "      <td>0.012531</td>\n",
       "      <td>0.924±0.013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.754934</td>\n",
       "      <td>0.020960</td>\n",
       "      <td>0.755±0.021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.791116</td>\n",
       "      <td>0.018720</td>\n",
       "      <td>0.791±0.019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.833288</td>\n",
       "      <td>0.017074</td>\n",
       "      <td>0.833±0.017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.777429</td>\n",
       "      <td>0.016781</td>\n",
       "      <td>0.777±0.017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.846125</td>\n",
       "      <td>0.015906</td>\n",
       "      <td>0.846±0.016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.892932</td>\n",
       "      <td>0.013358</td>\n",
       "      <td>0.893±0.013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.937348</td>\n",
       "      <td>0.012501</td>\n",
       "      <td>0.937±0.013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.941211</td>\n",
       "      <td>0.010973</td>\n",
       "      <td>0.941±0.011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Normal-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.754934</td>\n",
       "      <td>0.020960</td>\n",
       "      <td>0.755±0.021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.791116</td>\n",
       "      <td>0.018720</td>\n",
       "      <td>0.791±0.019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.856002</td>\n",
       "      <td>0.015854</td>\n",
       "      <td>0.856±0.016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.742327</td>\n",
       "      <td>0.046986</td>\n",
       "      <td>0.742±0.047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.945612</td>\n",
       "      <td>0.015991</td>\n",
       "      <td>0.946±0.016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.704681</td>\n",
       "      <td>0.091240</td>\n",
       "      <td>0.705±0.091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"18\" valign=\"top\">Kannada</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.795410</td>\n",
       "      <td>0.020614</td>\n",
       "      <td>0.795±0.021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.871186</td>\n",
       "      <td>0.020385</td>\n",
       "      <td>0.871±0.020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.915711</td>\n",
       "      <td>0.020513</td>\n",
       "      <td>0.916±0.021</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.904965</td>\n",
       "      <td>0.018738</td>\n",
       "      <td>0.905±0.019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.672482</td>\n",
       "      <td>0.023176</td>\n",
       "      <td>0.672±0.023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.725301</td>\n",
       "      <td>0.027018</td>\n",
       "      <td>0.725±0.027</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.766196</td>\n",
       "      <td>0.022765</td>\n",
       "      <td>0.766±0.023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.737872</td>\n",
       "      <td>0.024102</td>\n",
       "      <td>0.738±0.024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.784925</td>\n",
       "      <td>0.020036</td>\n",
       "      <td>0.785±0.020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.866803</td>\n",
       "      <td>0.017438</td>\n",
       "      <td>0.867±0.017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.909194</td>\n",
       "      <td>0.015344</td>\n",
       "      <td>0.909±0.015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.931495</td>\n",
       "      <td>0.012630</td>\n",
       "      <td>0.931±0.013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Normal-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.672482</td>\n",
       "      <td>0.023176</td>\n",
       "      <td>0.672±0.023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.725301</td>\n",
       "      <td>0.027018</td>\n",
       "      <td>0.725±0.027</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.795749</td>\n",
       "      <td>0.020481</td>\n",
       "      <td>0.796±0.020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.759617</td>\n",
       "      <td>0.055828</td>\n",
       "      <td>0.760±0.056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.915834</td>\n",
       "      <td>0.020497</td>\n",
       "      <td>0.916±0.020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.766791</td>\n",
       "      <td>0.082418</td>\n",
       "      <td>0.767±0.082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"18\" valign=\"top\">Kuzushiji</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.892334</td>\n",
       "      <td>0.012747</td>\n",
       "      <td>0.892±0.013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.922824</td>\n",
       "      <td>0.015294</td>\n",
       "      <td>0.923±0.015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.980926</td>\n",
       "      <td>0.005255</td>\n",
       "      <td>0.981±0.005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.948911</td>\n",
       "      <td>0.011853</td>\n",
       "      <td>0.949±0.012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.765723</td>\n",
       "      <td>0.017474</td>\n",
       "      <td>0.766±0.017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.800216</td>\n",
       "      <td>0.025491</td>\n",
       "      <td>0.800±0.025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.859731</td>\n",
       "      <td>0.017250</td>\n",
       "      <td>0.860±0.017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.809244</td>\n",
       "      <td>0.022866</td>\n",
       "      <td>0.809±0.023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.878677</td>\n",
       "      <td>0.013976</td>\n",
       "      <td>0.879±0.014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.912732</td>\n",
       "      <td>0.014254</td>\n",
       "      <td>0.913±0.014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.971368</td>\n",
       "      <td>0.005870</td>\n",
       "      <td>0.971±0.006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.963590</td>\n",
       "      <td>0.007863</td>\n",
       "      <td>0.964±0.008</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Normal-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.765723</td>\n",
       "      <td>0.017474</td>\n",
       "      <td>0.766±0.017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.800216</td>\n",
       "      <td>0.025491</td>\n",
       "      <td>0.800±0.025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.892808</td>\n",
       "      <td>0.012629</td>\n",
       "      <td>0.893±0.013</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.704285</td>\n",
       "      <td>0.042896</td>\n",
       "      <td>0.704±0.043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.980998</td>\n",
       "      <td>0.005265</td>\n",
       "      <td>0.981±0.005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.729624</td>\n",
       "      <td>0.075041</td>\n",
       "      <td>0.730±0.075</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"18\" valign=\"top\">MNIST</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.805911</td>\n",
       "      <td>0.025049</td>\n",
       "      <td>0.806±0.025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.872827</td>\n",
       "      <td>0.020456</td>\n",
       "      <td>0.873±0.020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923026</td>\n",
       "      <td>0.026492</td>\n",
       "      <td>0.923±0.026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.909389</td>\n",
       "      <td>0.018334</td>\n",
       "      <td>0.909±0.018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.682380</td>\n",
       "      <td>0.028018</td>\n",
       "      <td>0.682±0.028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.717624</td>\n",
       "      <td>0.026323</td>\n",
       "      <td>0.718±0.026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.775485</td>\n",
       "      <td>0.028166</td>\n",
       "      <td>0.775±0.028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.727608</td>\n",
       "      <td>0.025371</td>\n",
       "      <td>0.728±0.025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.795690</td>\n",
       "      <td>0.023867</td>\n",
       "      <td>0.796±0.024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.865677</td>\n",
       "      <td>0.018719</td>\n",
       "      <td>0.866±0.019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.916371</td>\n",
       "      <td>0.021639</td>\n",
       "      <td>0.916±0.022</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.934487</td>\n",
       "      <td>0.013514</td>\n",
       "      <td>0.934±0.014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Normal-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.682380</td>\n",
       "      <td>0.028018</td>\n",
       "      <td>0.682±0.028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.717624</td>\n",
       "      <td>0.026323</td>\n",
       "      <td>0.718±0.026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.806168</td>\n",
       "      <td>0.025038</td>\n",
       "      <td>0.806±0.025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.766797</td>\n",
       "      <td>0.053505</td>\n",
       "      <td>0.767±0.054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923044</td>\n",
       "      <td>0.026493</td>\n",
       "      <td>0.923±0.026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.731785</td>\n",
       "      <td>0.090464</td>\n",
       "      <td>0.732±0.090</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                      auc  \\\n",
       "                                                                                     mean   \n",
       "train_domain test_domain  prediction_type           bayes_model last_activation             \n",
       "Fashion      EMNIST-MNIST Independent-mode_entropy  MNIST-CNN   sigmoid          0.855732   \n",
       "                                                                softmax          0.899389   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.945595   \n",
       "                                                                softmax          0.923978   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.754934   \n",
       "                                                                softmax          0.791116   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.833288   \n",
       "                                                                softmax          0.777429   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.846125   \n",
       "                                                                softmax          0.892932   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.937348   \n",
       "                                                                softmax          0.941211   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.754934   \n",
       "                                                                softmax          0.791116   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.856002   \n",
       "                                                                softmax          0.742327   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.945612   \n",
       "                                                                softmax          0.704681   \n",
       "             Kannada      Independent-mode_entropy  MNIST-CNN   sigmoid          0.795410   \n",
       "                                                                softmax          0.871186   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.915711   \n",
       "                                                                softmax          0.904965   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.672482   \n",
       "                                                                softmax          0.725301   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.766196   \n",
       "                                                                softmax          0.737872   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.784925   \n",
       "                                                                softmax          0.866803   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.909194   \n",
       "                                                                softmax          0.931495   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.672482   \n",
       "                                                                softmax          0.725301   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.795749   \n",
       "                                                                softmax          0.759617   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.915834   \n",
       "                                                                softmax          0.766791   \n",
       "             Kuzushiji    Independent-mode_entropy  MNIST-CNN   sigmoid          0.892334   \n",
       "                                                                softmax          0.922824   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.980926   \n",
       "                                                                softmax          0.948911   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.765723   \n",
       "                                                                softmax          0.800216   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.859731   \n",
       "                                                                softmax          0.809244   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.878677   \n",
       "                                                                softmax          0.912732   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.971368   \n",
       "                                                                softmax          0.963590   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.765723   \n",
       "                                                                softmax          0.800216   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.892808   \n",
       "                                                                softmax          0.704285   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.980998   \n",
       "                                                                softmax          0.729624   \n",
       "             MNIST        Independent-mode_entropy  MNIST-CNN   sigmoid          0.805911   \n",
       "                                                                softmax          0.872827   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.923026   \n",
       "                                                                softmax          0.909389   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.682380   \n",
       "                                                                softmax          0.717624   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.775485   \n",
       "                                                                softmax          0.727608   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.795690   \n",
       "                                                                softmax          0.865677   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.916371   \n",
       "                                                                softmax          0.934487   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.682380   \n",
       "                                                                softmax          0.717624   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.806168   \n",
       "                                                                softmax          0.766797   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.923044   \n",
       "                                                                softmax          0.731785   \n",
       "\n",
       "                                                                                           \\\n",
       "                                                                                      std   \n",
       "train_domain test_domain  prediction_type           bayes_model last_activation             \n",
       "Fashion      EMNIST-MNIST Independent-mode_entropy  MNIST-CNN   sigmoid          0.015856   \n",
       "                                                                softmax          0.013707   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.015965   \n",
       "                                                                softmax          0.012531   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.020960   \n",
       "                                                                softmax          0.018720   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.017074   \n",
       "                                                                softmax          0.016781   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.015906   \n",
       "                                                                softmax          0.013358   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.012501   \n",
       "                                                                softmax          0.010973   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.020960   \n",
       "                                                                softmax          0.018720   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.015854   \n",
       "                                                                softmax          0.046986   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.015991   \n",
       "                                                                softmax          0.091240   \n",
       "             Kannada      Independent-mode_entropy  MNIST-CNN   sigmoid          0.020614   \n",
       "                                                                softmax          0.020385   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.020513   \n",
       "                                                                softmax          0.018738   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.023176   \n",
       "                                                                softmax          0.027018   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.022765   \n",
       "                                                                softmax          0.024102   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.020036   \n",
       "                                                                softmax          0.017438   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.015344   \n",
       "                                                                softmax          0.012630   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.023176   \n",
       "                                                                softmax          0.027018   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.020481   \n",
       "                                                                softmax          0.055828   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.020497   \n",
       "                                                                softmax          0.082418   \n",
       "             Kuzushiji    Independent-mode_entropy  MNIST-CNN   sigmoid          0.012747   \n",
       "                                                                softmax          0.015294   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.005255   \n",
       "                                                                softmax          0.011853   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.017474   \n",
       "                                                                softmax          0.025491   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.017250   \n",
       "                                                                softmax          0.022866   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.013976   \n",
       "                                                                softmax          0.014254   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.005870   \n",
       "                                                                softmax          0.007863   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.017474   \n",
       "                                                                softmax          0.025491   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.012629   \n",
       "                                                                softmax          0.042896   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.005265   \n",
       "                                                                softmax          0.075041   \n",
       "             MNIST        Independent-mode_entropy  MNIST-CNN   sigmoid          0.025049   \n",
       "                                                                softmax          0.020456   \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.026492   \n",
       "                                                                softmax          0.018334   \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.028018   \n",
       "                                                                softmax          0.026323   \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.028166   \n",
       "                                                                softmax          0.025371   \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.023867   \n",
       "                                                                softmax          0.018719   \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.021639   \n",
       "                                                                softmax          0.013514   \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.028018   \n",
       "                                                                softmax          0.026323   \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.025038   \n",
       "                                                                softmax          0.053505   \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.026493   \n",
       "                                                                softmax          0.090464   \n",
       "\n",
       "                                                                                     auc_str  \n",
       "                                                                                              \n",
       "train_domain test_domain  prediction_type           bayes_model last_activation               \n",
       "Fashion      EMNIST-MNIST Independent-mode_entropy  MNIST-CNN   sigmoid          0.856±0.016  \n",
       "                                                                softmax          0.899±0.014  \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.946±0.016  \n",
       "                                                                softmax          0.924±0.013  \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.755±0.021  \n",
       "                                                                softmax          0.791±0.019  \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.833±0.017  \n",
       "                                                                softmax          0.777±0.017  \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.846±0.016  \n",
       "                                                                softmax          0.893±0.013  \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.937±0.013  \n",
       "                                                                softmax          0.941±0.011  \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.755±0.021  \n",
       "                                                                softmax          0.791±0.019  \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.856±0.016  \n",
       "                                                                softmax          0.742±0.047  \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.946±0.016  \n",
       "                                                                softmax          0.705±0.091  \n",
       "             Kannada      Independent-mode_entropy  MNIST-CNN   sigmoid          0.795±0.021  \n",
       "                                                                softmax          0.871±0.020  \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.916±0.021  \n",
       "                                                                softmax          0.905±0.019  \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.672±0.023  \n",
       "                                                                softmax          0.725±0.027  \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.766±0.023  \n",
       "                                                                softmax          0.738±0.024  \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.785±0.020  \n",
       "                                                                softmax          0.867±0.017  \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.909±0.015  \n",
       "                                                                softmax          0.931±0.013  \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.672±0.023  \n",
       "                                                                softmax          0.725±0.027  \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.796±0.020  \n",
       "                                                                softmax          0.760±0.056  \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.916±0.020  \n",
       "                                                                softmax          0.767±0.082  \n",
       "             Kuzushiji    Independent-mode_entropy  MNIST-CNN   sigmoid          0.892±0.013  \n",
       "                                                                softmax          0.923±0.015  \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.981±0.005  \n",
       "                                                                softmax          0.949±0.012  \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.766±0.017  \n",
       "                                                                softmax          0.800±0.025  \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.860±0.017  \n",
       "                                                                softmax          0.809±0.023  \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.879±0.014  \n",
       "                                                                softmax          0.913±0.014  \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.971±0.006  \n",
       "                                                                softmax          0.964±0.008  \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.766±0.017  \n",
       "                                                                softmax          0.800±0.025  \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.893±0.013  \n",
       "                                                                softmax          0.704±0.043  \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.981±0.005  \n",
       "                                                                softmax          0.730±0.075  \n",
       "             MNIST        Independent-mode_entropy  MNIST-CNN   sigmoid          0.806±0.025  \n",
       "                                                                softmax          0.873±0.020  \n",
       "                          Independent-mode_mean-std MNIST-CNN   sigmoid          0.923±0.026  \n",
       "                                                                softmax          0.909±0.018  \n",
       "                          Linear-mode_entropy       MNIST-CNN   sigmoid          0.682±0.028  \n",
       "                                                                softmax          0.718±0.026  \n",
       "                          Linear-mode_mean-std      MNIST-CNN   sigmoid          0.775±0.028  \n",
       "                                                                softmax          0.728±0.025  \n",
       "                          MC-mode_entropy           MNIST-CNN   sigmoid          0.796±0.024  \n",
       "                                                                softmax          0.866±0.019  \n",
       "                          MC-mode_mean-std          MNIST-CNN   sigmoid          0.916±0.022  \n",
       "                                                                softmax          0.934±0.014  \n",
       "                          Normal-mode_entropy       MNIST-CNN   sigmoid          0.682±0.028  \n",
       "                                                                softmax          0.718±0.026  \n",
       "                          Upper-mode_entropy        MNIST-CNN   sigmoid          0.806±0.025  \n",
       "                                                                softmax          0.767±0.054  \n",
       "                          Upper-mode_mean-std       MNIST-CNN   sigmoid          0.923±0.026  \n",
       "                                                                softmax          0.732±0.090  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option(\"display.max_rows\", None)\n",
    "\n",
    "auc_mean_df = auc_df.groupby(['train_domain', 'test_domain', 'prediction_type', 'bayes_model', 'last_activation']).agg({'auc': ['mean', 'std']})\n",
    "auc_mean_df['auc_str'] = auc_mean_df.apply(lambda x: '{0:.3f}±{1:.3f}'.format(x['auc']['mean'], x['auc']['std']), axis=1)\n",
    "auc_mean_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy_dfs = []\n",
    "for csv_filepath in sorted(glob.glob('score/*.csv')):\n",
    "    df = pd.read_csv(csv_filepath)\n",
    "    df = df[df['train_domain'] == df['test_domain']]\n",
    "    df['correct'] = df['y_test'] == df['y_pred']\n",
    "    accuracy_df = df[['train_domain', 'prediction_type', 'bayes_model', 'last_activation', 'correct']].groupby(['train_domain', 'prediction_type', 'bayes_model', 'last_activation']).mean().reset_index()\n",
    "    accuracy_df = accuracy_df.rename(columns={'correct': 'accuracy'})\n",
    "    accuracy_df['seed'] = df['seed'].iloc[0]\n",
    "    accuracy_dfs.append(accuracy_df)\n",
    "accuracy_df = pd.concat(accuracy_dfs, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">accuracy</th>\n",
       "      <th>accuracy_str</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>train_domain</th>\n",
       "      <th>prediction_type</th>\n",
       "      <th>bayes_model</th>\n",
       "      <th>last_activation</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"18\" valign=\"top\">Fashion</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923047</td>\n",
       "      <td>0.002346</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.923003</td>\n",
       "      <td>0.002630</td>\n",
       "      <td>0.923±0.003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Independent-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923047</td>\n",
       "      <td>0.002346</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.923003</td>\n",
       "      <td>0.002630</td>\n",
       "      <td>0.923±0.003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923080</td>\n",
       "      <td>0.002315</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.922923</td>\n",
       "      <td>0.002527</td>\n",
       "      <td>0.923±0.003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Linear-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923080</td>\n",
       "      <td>0.002315</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.922923</td>\n",
       "      <td>0.002527</td>\n",
       "      <td>0.923±0.003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923020</td>\n",
       "      <td>0.002347</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.922943</td>\n",
       "      <td>0.002649</td>\n",
       "      <td>0.923±0.003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">MC-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923020</td>\n",
       "      <td>0.002347</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.922943</td>\n",
       "      <td>0.002649</td>\n",
       "      <td>0.923±0.003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Normal-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923080</td>\n",
       "      <td>0.002315</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.922923</td>\n",
       "      <td>0.002527</td>\n",
       "      <td>0.923±0.003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_entropy</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923010</td>\n",
       "      <td>0.002355</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.663260</td>\n",
       "      <td>0.083609</td>\n",
       "      <td>0.663±0.084</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Upper-mode_mean-std</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">MNIST-CNN</th>\n",
       "      <th>sigmoid</th>\n",
       "      <td>0.923010</td>\n",
       "      <td>0.002355</td>\n",
       "      <td>0.923±0.002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>softmax</th>\n",
       "      <td>0.663260</td>\n",
       "      <td>0.083609</td>\n",
       "      <td>0.663±0.084</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                    accuracy  \\\n",
       "                                                                        mean   \n",
       "train_domain prediction_type           bayes_model last_activation             \n",
       "Fashion      Independent-mode_entropy  MNIST-CNN   sigmoid          0.923047   \n",
       "                                                   softmax          0.923003   \n",
       "             Independent-mode_mean-std MNIST-CNN   sigmoid          0.923047   \n",
       "                                                   softmax          0.923003   \n",
       "             Linear-mode_entropy       MNIST-CNN   sigmoid          0.923080   \n",
       "                                                   softmax          0.922923   \n",
       "             Linear-mode_mean-std      MNIST-CNN   sigmoid          0.923080   \n",
       "                                                   softmax          0.922923   \n",
       "             MC-mode_entropy           MNIST-CNN   sigmoid          0.923020   \n",
       "                                                   softmax          0.922943   \n",
       "             MC-mode_mean-std          MNIST-CNN   sigmoid          0.923020   \n",
       "                                                   softmax          0.922943   \n",
       "             Normal-mode_entropy       MNIST-CNN   sigmoid          0.923080   \n",
       "                                                   softmax          0.922923   \n",
       "             Upper-mode_entropy        MNIST-CNN   sigmoid          0.923010   \n",
       "                                                   softmax          0.663260   \n",
       "             Upper-mode_mean-std       MNIST-CNN   sigmoid          0.923010   \n",
       "                                                   softmax          0.663260   \n",
       "\n",
       "                                                                              \\\n",
       "                                                                         std   \n",
       "train_domain prediction_type           bayes_model last_activation             \n",
       "Fashion      Independent-mode_entropy  MNIST-CNN   sigmoid          0.002346   \n",
       "                                                   softmax          0.002630   \n",
       "             Independent-mode_mean-std MNIST-CNN   sigmoid          0.002346   \n",
       "                                                   softmax          0.002630   \n",
       "             Linear-mode_entropy       MNIST-CNN   sigmoid          0.002315   \n",
       "                                                   softmax          0.002527   \n",
       "             Linear-mode_mean-std      MNIST-CNN   sigmoid          0.002315   \n",
       "                                                   softmax          0.002527   \n",
       "             MC-mode_entropy           MNIST-CNN   sigmoid          0.002347   \n",
       "                                                   softmax          0.002649   \n",
       "             MC-mode_mean-std          MNIST-CNN   sigmoid          0.002347   \n",
       "                                                   softmax          0.002649   \n",
       "             Normal-mode_entropy       MNIST-CNN   sigmoid          0.002315   \n",
       "                                                   softmax          0.002527   \n",
       "             Upper-mode_entropy        MNIST-CNN   sigmoid          0.002355   \n",
       "                                                   softmax          0.083609   \n",
       "             Upper-mode_mean-std       MNIST-CNN   sigmoid          0.002355   \n",
       "                                                   softmax          0.083609   \n",
       "\n",
       "                                                                   accuracy_str  \n",
       "                                                                                 \n",
       "train_domain prediction_type           bayes_model last_activation               \n",
       "Fashion      Independent-mode_entropy  MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.923±0.003  \n",
       "             Independent-mode_mean-std MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.923±0.003  \n",
       "             Linear-mode_entropy       MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.923±0.003  \n",
       "             Linear-mode_mean-std      MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.923±0.003  \n",
       "             MC-mode_entropy           MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.923±0.003  \n",
       "             MC-mode_mean-std          MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.923±0.003  \n",
       "             Normal-mode_entropy       MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.923±0.003  \n",
       "             Upper-mode_entropy        MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.663±0.084  \n",
       "             Upper-mode_mean-std       MNIST-CNN   sigmoid          0.923±0.002  \n",
       "                                                   softmax          0.663±0.084  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "accuracy_mean_df = accuracy_df.groupby(['train_domain', 'prediction_type', 'bayes_model', 'last_activation']).agg({'accuracy': ['mean', 'std']})\n",
    "accuracy_mean_df['accuracy_str'] = accuracy_mean_df.apply(lambda x: '{0:.3f}±{1:.3f}'.format(x['accuracy']['mean'], x['accuracy']['std']), axis=1)\n",
    "accuracy_mean_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
