{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# **Material_2**"
      ],
      "metadata": {
        "id": "3NsXY65qICyf"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## **dataset: MNIST**"
      ],
      "metadata": {
        "id": "OOTEP3d4IHz6"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "GrrcQEZiIia0",
        "outputId": "a85da201-4619-47e5-9ae8-3ea7fde005c7"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Wed May 22 15:12:08 2024       \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |\n",
            "|-----------------------------------------+----------------------+----------------------+\n",
            "| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                                         |                      |               MIG M. |\n",
            "|=========================================+======================+======================|\n",
            "|   0  NVIDIA L4                      Off | 00000000:00:03.0 Off |                    0 |\n",
            "| N/A   46C    P8              12W /  72W |      1MiB / 23034MiB |      0%      Default |\n",
            "|                                         |                      |                  N/A |\n",
            "+-----------------------------------------+----------------------+----------------------+\n",
            "                                                                                         \n",
            "+---------------------------------------------------------------------------------------+\n",
            "| Processes:                                                                            |\n",
            "|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |\n",
            "|        ID   ID                                                             Usage      |\n",
            "|=======================================================================================|\n",
            "|  No running processes found                                                           |\n",
            "+---------------------------------------------------------------------------------------+\n"
          ]
        }
      ],
      "source": [
        "! nvidia-smi"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fAXmlRf5Iujm",
        "outputId": "4155af42-9d10-4066-b937-2d55364039a4"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'colorized-MNIST'...\n",
            "remote: Enumerating objects: 18863, done.\u001b[K\n",
            "remote: Counting objects: 100% (18863/18863), done.\u001b[K\n",
            "remote: Compressing objects: 100% (18862/18862), done.\u001b[K\n",
            "remote: Total 18863 (delta 2), reused 18858 (delta 1), pack-reused 0\u001b[K\n",
            "Receiving objects: 100% (18863/18863), 5.29 MiB | 6.11 MiB/s, done.\n",
            "Resolving deltas: 100% (2/2), done.\n",
            "Updating files: 100% (18838/18838), done.\n",
            "/content/colorized-MNIST\n"
          ]
        }
      ],
      "source": [
        "!git clone https://github.com/jayaneetha/colorized-MNIST.git\n",
        "%cd colorized-MNIST"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "7OT4WHjBIw29",
        "outputId": "68e049cf-34a4-4e9d-900a-031a62aa3d05"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "8830\n",
            "10000\n"
          ]
        }
      ],
      "source": [
        "# Data processing\n",
        "import numpy as np  # Import numpy for numerical operations\n",
        "import torch  # Import main PyTorch library\n",
        "import torchvision  # Import torchvision for datasets and image transformations\n",
        "import torch.nn as nn  # Import neural network module from torch\n",
        "from matplotlib import pyplot as plt  # Import pyplot from matplotlib for plotting\n",
        "from torch.utils.data import DataLoader, Subset  # Import DataLoader and Subset from torch.utils.data\n",
        "import torch.optim as optim  # Import optim module from torch for optimization algorithms\n",
        "import torchvision.transforms as transforms  # Import transforms from torchvision for image transformations\n",
        "from torch.utils.data import random_split  # Import random_split from torch.utils.data for splitting datasets\n",
        "from torch.utils.data import DataLoader, ConcatDataset, random_split  # Import DataLoader, ConcatDataset, and random_split from torch.utils.data\n",
        "from torchvision import datasets, transforms  # Import datasets and transforms from torchvision\n",
        "\n",
        "transform = transforms.Compose(\n",
        "    [transforms.ToTensor(),\n",
        "     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])  # Define the transformation to be applied to the images\n",
        "\n",
        "all_set = datasets.ImageFolder(root='/content/colorized-MNIST/training', transform=transform)  # Load training dataset\n",
        "test_dataset = datasets.ImageFolder(root='/content/colorized-MNIST/testing', transform=transform)  # Load test dataset\n",
        "subset_indices = list(range(8830))  # Create a list of indices for the subset\n",
        "traning_set = Subset(all_set, subset_indices)  # Create a subset of the training dataset\n",
        "print(len(traning_set))  # Print the length of the training set\n",
        "print(len(test_dataset))  # Print the length of the test dataset\n",
        "e12_, e34_ = random_split(dataset=traning_set, lengths=[4415, 4415], generator=torch.Generator().manual_seed(0))  # Split the training set into two equal parts"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "pOBq6HLz-LG7"
      },
      "outputs": [],
      "source": [
        "num_classes = 10  # Number of classes\n",
        "samples_per_class = 10  # Number of samples to extract per class"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "F69uGAKJ-u79",
        "outputId": "8b520b07-ebef-450e-8952-3fd24b01131e"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "4315\n",
            "4315\n"
          ]
        }
      ],
      "source": [
        "############################ e12 sample extract\n",
        "extracted_samples_e12 = {class_idx: [] for class_idx in range(10)}  # Initialize a dictionary to store extracted samples\n",
        "for idx, (image, label) in enumerate(e12_):  # Extract a specified number of samples from e12 dataset\n",
        "    if len(extracted_samples_e12[label]) < 10:\n",
        "        extracted_samples_e12[label].append(idx)\n",
        "    if all(len(samples) == 10 for samples in extracted_samples_e12.values()):  # Check if the required number of samples have been extracted\n",
        "        break\n",
        "extracted_samples_flat_e12 = [sample_idx for samples in extracted_samples_e12.values() for sample_idx in samples]  # Flatten the extracted samples into a list\n",
        "ee12_ = torch.utils.data.Subset(e12_, [idx for idx in range(len(e12_)) if idx not in extracted_samples_flat_e12])  # Remove extracted samples from e12\n",
        "print(len(ee12_))  # Print the length of the remaining e12 dataset\n",
        "extracted_subset_e12 = torch.utils.data.Subset(ee12_, extracted_samples_flat_e12)  # Create a subset of extracted samples\n",
        "e12_extracted_loader = torch.utils.data.DataLoader(extracted_subset_e12, batch_size=100, shuffle=False, num_workers=0)  # Create DataLoader for the extracted subset\n",
        "e12_extracted_loader_iter = iter(e12_extracted_loader)  # Create an iterator for the DataLoader\n",
        "e12_extracted_loader_image, e12_extracted_loader_label = next(e12_extracted_loader_iter)  # Get the next batch of images and labels\n",
        "\n",
        "############################ e34 sample extract\n",
        "extracted_samples_e34 = {class_idx: [] for class_idx in range(10)}  # Initialize a dictionary to store extracted samples\n",
        "for idx, (image, label) in enumerate(e34_):  # Extract a specified number of samples from e34 dataset\n",
        "    if len(extracted_samples_e34[label]) < 10:\n",
        "        extracted_samples_e34[label].append(idx)\n",
        "    if all(len(samples) == 10 for samples in extracted_samples_e34.values()):  # Check if the required number of samples have been extracted\n",
        "        break\n",
        "extracted_samples_flat_e34 = [sample_idx for samples in extracted_samples_e34.values() for sample_idx in samples]  # Flatten the extracted samples into a list\n",
        "ee34_ = torch.utils.data.Subset(e34_, [idx for idx in range(len(e34_)) if idx not in extracted_samples_flat_e34])  # Remove extracted samples from e34\n",
        "print(len(ee34_))  # Print the length of the remaining e34 dataset\n",
        "extracted_subset_e34 = torch.utils.data.Subset(ee34_, extracted_samples_flat_e34)  # Create a subset of extracted samples\n",
        "e34_extracted_loader = torch.utils.data.DataLoader(extracted_subset_e34, batch_size=100, shuffle=False, num_workers=0)  # Create DataLoader for the extracted subset\n",
        "e34_extracted_loader_iter = iter(e34_extracted_loader)  # Create an iterator for the DataLoader\n",
        "e34_extracted_loader_image, e34_extracted_loader_label = next(e34_extracted_loader_iter)  # Get the next batch of images and labels"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "D8HkkTNyJJc-",
        "outputId": "b8d1f4ac-8c24-49b2-e817-386527cf0470"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "torch.Size([8830, 3, 28, 28])\n"
          ]
        }
      ],
      "source": [
        "e1234 = ConcatDataset([ee12_, ee34_])  # Concatenate the datasets ee12_ and ee34_\n",
        "e3412 = ConcatDataset([ee34_, ee12_])  # Concatenate the datasets ee34_ and ee12_\n",
        "e1234_loader = torch.utils.data.DataLoader(e1234, batch_size=5, shuffle=True, num_workers=0)  # Create DataLoader for the concatenated dataset e1234\n",
        "e3412_loader = torch.utils.data.DataLoader(e3412, batch_size=5, shuffle=True, num_workers=0)  # Create DataLoader for the concatenated dataset e3412\n",
        "\n",
        "s_test_set = datasets.ImageFolder(root='/content/colorized-MNIST/testing', transform=transform)  # Load the test dataset\n",
        "s_test_loader = torch.utils.data.DataLoader(s_test_set, batch_size=8830, shuffle=False, num_workers=0)  # Create DataLoader for the test dataset\n",
        "s_test_data_iter = iter(s_test_loader)  # Create an iterator for the test DataLoader\n",
        "s_test_image, s_test_label = next(s_test_data_iter)  # Get the next batch of test images and labels\n",
        "print(s_test_image.shape)  # Print the shape of the test images\n",
        "\n",
        "##################\n",
        "s_tra_set = datasets.ImageFolder(root='/content/colorized-MNIST/training', transform=transform)  # Load the training dataset\n",
        "s_tra_loader = torch.utils.data.DataLoader(s_tra_set, batch_size=10000, shuffle=False, num_workers=0)  # Create DataLoader for the training dataset\n",
        "s_tra_data_iter = iter(s_tra_loader)  # Create an iterator for the training DataLoader\n",
        "s_tra_image, s_tra_label = next(s_tra_data_iter)  # Get the next batch of training images and labels\n",
        "\n",
        "##################\n",
        "classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')  # Define the class labels"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "TtIBUz9SSXH2"
      },
      "outputs": [],
      "source": [
        "import random\n",
        "\n",
        "## Sampling ## validation\n",
        "############################ e12_ ###########################\n",
        "class_indices = {class_idx: [] for class_idx in range(10)}  # Initialize dictionary to store class indices\n",
        "for idx, (image, label) in enumerate(ee12_):  # Enumerate through ee12_ dataset\n",
        "    class_indices[label].append(idx)  # Append index to corresponding class\n",
        "sample_indices_12 = []\n",
        "for class_idx, indices in class_indices.items():  # For each class, sample 10 indices\n",
        "    sample_indices_12.extend(random.sample(indices, 10))\n",
        "sample_e12 = torch.utils.data.Subset(ee12_, sample_indices_12)  # Create a subset of ee12_ with sampled indices\n",
        "e12_s_loader = torch.utils.data.DataLoader(sample_e12, batch_size=100, shuffle=False, num_workers=0)  # DataLoader for the sampled subset\n",
        "e12_s_loader_iter = iter(e12_s_loader)  # Create an iterator\n",
        "e12_s_loader_image, e12_s_loader_label = next(e12_s_loader_iter)  # Get a batch of images and labels\n",
        "\n",
        "############################ e34_ ###########################\n",
        "class_indices = {class_idx: [] for class_idx in range(10)}  # Initialize dictionary to store class indices\n",
        "for idx, (image, label) in enumerate(ee34_):  # Enumerate through ee34_ dataset\n",
        "    class_indices[label].append(idx)  # Append index to corresponding class\n",
        "sample_indices_34 = []\n",
        "for class_idx, indices in class_indices.items():  # For each class, sample 10 indices\n",
        "    sample_indices_34.extend(random.sample(indices, 10))\n",
        "sample_e34 = torch.utils.data.Subset(ee34_, sample_indices_34)  # Create a subset of ee34_ with sampled indices\n",
        "e34_s_loader = torch.utils.data.DataLoader(sample_e34, batch_size=100, shuffle=False, num_workers=0)  # DataLoader for the sampled subset\n",
        "e34_s_loader_iter = iter(e34_s_loader)  # Create an iterator\n",
        "e34_s_loader_image, e34_s_loader_label = next(e34_s_loader_iter)  # Get a batch of images and labels"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "cuda = True if torch.cuda.is_available() else False"
      ],
      "metadata": {
        "id": "4Mor5mfRKyc9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RgmnTJMHpxVU"
      },
      "source": [
        "### **Network1 : use only linear layers + Generalization Decision Process (GDP)**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_ZD4Ug4aNH7Y"
      },
      "outputs": [],
      "source": [
        "import argparse\n",
        "import os\n",
        "import numpy as np\n",
        "import torchvision.transforms as transforms\n",
        "from torchvision.utils import save_image\n",
        "from torch.utils.data import DataLoader\n",
        "from torchvision import datasets\n",
        "from torch.autograd import Variable\n",
        "import torch.nn as nn\n",
        "import torch\n",
        "\n",
        "class LLNet(nn.Module):  # Use only linear layers\n",
        "    def __init__(self):\n",
        "        super(LLNet, self).__init__()\n",
        "\n",
        "        self.fc1 = nn.Linear(3*28*28, 1000)  # First fully connected layer\n",
        "        self.fc2 = nn.Linear(1000, 500)      # Second fully connected layer\n",
        "        self.fc3 = nn.Linear(500, 100)       # Third fully connected layer\n",
        "        self.fc4 = nn.Linear(100, 50)        # Fourth fully connected layer\n",
        "        self.fc5 = nn.Linear(50, 25)         # Fifth fully connected layer\n",
        "        self.fc6 = nn.Linear(25, 20)         # Sixth fully connected layer\n",
        "        self.fc7 = nn.Linear(20, 10)         # Seventh fully connected layer (output layer)\n",
        "\n",
        "    def forward(self, x):\n",
        "        x = x.view(-1, 3*28*28)  # Flatten the input image\n",
        "        x = self.fc1(x)  # Apply first fully connected layer\n",
        "        x = self.fc2(x)  # Apply second fully connected layer\n",
        "        x = self.fc3(x)  # Apply third fully connected layer\n",
        "        x = self.fc4(x)  # Apply fourth fully connected layer\n",
        "        x = self.fc5(x)  # Apply fifth fully connected layer\n",
        "        x = self.fc6(x)  # Apply sixth fully connected layer\n",
        "        x = self.fc7(x)  # Apply seventh fully connected layer (output)\n",
        "        return x\n",
        "\n",
        "loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function\n",
        "net = LLNet()  # Instantiate the network\n",
        "\n",
        "# If a GPU is available, run everything in CUDA mode\n",
        "if torch.cuda.is_available():\n",
        "    net = net.cuda()  # Move the network to GPU\n",
        "    loss_function = loss_function.cuda()  # Move the loss function to GPU\n",
        "\n",
        "optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Define the optimizer with a learning rate of 0.001"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gK_P0r_OOdaP",
        "outputId": "a557d5da-e6a5-4e5e-f3ca-33f6cc9645b6"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[1,     5] train_loss: 2.460 train_accuracy: 0.134 test_accuracy: 0.130\n",
            "[1,    10] train_loss: 2.540 train_accuracy: 0.101 test_accuracy: 0.111\n",
            "[1,    15] train_loss: 2.222 train_accuracy: 0.197 test_accuracy: 0.215\n",
            "[1,    20] train_loss: 2.275 train_accuracy: 0.249 test_accuracy: 0.239\n",
            "[1,    25] train_loss: 2.240 train_accuracy: 0.257 test_accuracy: 0.273\n",
            "[1,    30] train_loss: 2.171 train_accuracy: 0.250 test_accuracy: 0.237\n",
            "[1,    35] train_loss: 1.845 train_accuracy: 0.341 test_accuracy: 0.385\n",
            "[1,    40] train_loss: 1.938 train_accuracy: 0.411 test_accuracy: 0.461\n",
            "[1,    45] train_loss: 1.979 train_accuracy: 0.316 test_accuracy: 0.331\n",
            "[1,    50] train_loss: 1.871 train_accuracy: 0.355 test_accuracy: 0.339\n",
            "[1,    55] train_loss: 1.779 train_accuracy: 0.374 test_accuracy: 0.360\n",
            "[1,    60] train_loss: 2.073 train_accuracy: 0.433 test_accuracy: 0.460\n",
            "[1,    65] train_loss: 2.013 train_accuracy: 0.365 test_accuracy: 0.401\n",
            "[1,    70] train_loss: 1.898 train_accuracy: 0.340 test_accuracy: 0.353\n",
            "[1,    75] train_loss: 1.869 train_accuracy: 0.309 test_accuracy: 0.286\n",
            "[1,    80] train_loss: 1.617 train_accuracy: 0.249 test_accuracy: 0.286\n",
            "[1,    85] train_loss: 2.454 train_accuracy: 0.381 test_accuracy: 0.409\n",
            "[1,    90] train_loss: 1.882 train_accuracy: 0.423 test_accuracy: 0.461\n",
            "[1,    95] train_loss: 1.555 train_accuracy: 0.491 test_accuracy: 0.553\n",
            "[1,   100] train_loss: 1.759 train_accuracy: 0.508 test_accuracy: 0.549\n",
            "[1,   105] train_loss: 1.202 train_accuracy: 0.414 test_accuracy: 0.392\n",
            "[1,   110] train_loss: 1.485 train_accuracy: 0.451 test_accuracy: 0.471\n",
            "[1,   115] train_loss: 1.734 train_accuracy: 0.516 test_accuracy: 0.547\n",
            "[1,   120] train_loss: 1.266 train_accuracy: 0.474 test_accuracy: 0.534\n",
            "[1,   125] train_loss: 1.065 train_accuracy: 0.549 test_accuracy: 0.548\n",
            "[1,   130] train_loss: 1.249 train_accuracy: 0.553 test_accuracy: 0.550\n",
            "[1,   135] train_loss: 1.076 train_accuracy: 0.566 test_accuracy: 0.570\n",
            "[1,   140] train_loss: 1.594 train_accuracy: 0.491 test_accuracy: 0.483\n",
            "[1,   145] train_loss: 1.573 train_accuracy: 0.467 test_accuracy: 0.498\n",
            "[1,   150] train_loss: 1.437 train_accuracy: 0.501 test_accuracy: 0.535\n",
            "[1,   155] train_loss: 1.763 train_accuracy: 0.490 test_accuracy: 0.549\n",
            "[1,   160] train_loss: 1.745 train_accuracy: 0.596 test_accuracy: 0.613\n",
            "[1,   165] train_loss: 1.206 train_accuracy: 0.493 test_accuracy: 0.440\n",
            "[1,   170] train_loss: 1.158 train_accuracy: 0.525 test_accuracy: 0.490\n",
            "[1,   175] train_loss: 1.169 train_accuracy: 0.557 test_accuracy: 0.572\n",
            "[1,   180] train_loss: 1.810 train_accuracy: 0.470 test_accuracy: 0.481\n",
            "[1,   185] train_loss: 1.591 train_accuracy: 0.408 test_accuracy: 0.431\n",
            "[1,   190] train_loss: 3.305 train_accuracy: 0.490 test_accuracy: 0.525\n",
            "[1,   195] train_loss: 1.683 train_accuracy: 0.335 test_accuracy: 0.361\n",
            "[1,   200] train_loss: 2.359 train_accuracy: 0.305 test_accuracy: 0.295\n",
            "[1,   205] train_loss: 2.162 train_accuracy: 0.492 test_accuracy: 0.519\n",
            "[1,   210] train_loss: 1.589 train_accuracy: 0.452 test_accuracy: 0.470\n",
            "[1,   215] train_loss: 1.370 train_accuracy: 0.447 test_accuracy: 0.473\n",
            "[1,   220] train_loss: 1.497 train_accuracy: 0.436 test_accuracy: 0.449\n",
            "[1,   225] train_loss: 1.076 train_accuracy: 0.553 test_accuracy: 0.556\n",
            "[1,   230] train_loss: 1.127 train_accuracy: 0.493 test_accuracy: 0.480\n",
            "[1,   235] train_loss: 1.405 train_accuracy: 0.518 test_accuracy: 0.511\n",
            "[1,   240] train_loss: 1.038 train_accuracy: 0.477 test_accuracy: 0.480\n",
            "[1,   245] train_loss: 1.203 train_accuracy: 0.496 test_accuracy: 0.511\n",
            "[1,   250] train_loss: 1.356 train_accuracy: 0.538 test_accuracy: 0.575\n",
            "[1,   255] train_loss: 1.327 train_accuracy: 0.619 test_accuracy: 0.637\n",
            "[1,   260] train_loss: 1.482 train_accuracy: 0.571 test_accuracy: 0.576\n",
            "[1,   265] train_loss: 1.543 train_accuracy: 0.616 test_accuracy: 0.679\n",
            "[1,   270] train_loss: 0.898 train_accuracy: 0.605 test_accuracy: 0.661\n",
            "[1,   275] train_loss: 1.265 train_accuracy: 0.619 test_accuracy: 0.680\n",
            "[1,   280] train_loss: 0.992 train_accuracy: 0.577 test_accuracy: 0.618\n",
            "[1,   285] train_loss: 0.962 train_accuracy: 0.620 test_accuracy: 0.636\n",
            "[1,   290] train_loss: 0.777 train_accuracy: 0.639 test_accuracy: 0.621\n",
            "[1,   295] train_loss: 0.901 train_accuracy: 0.649 test_accuracy: 0.629\n",
            "[1,   300] train_loss: 1.021 train_accuracy: 0.698 test_accuracy: 0.695\n",
            "[1,   305] train_loss: 1.051 train_accuracy: 0.721 test_accuracy: 0.734\n",
            "[1,   310] train_loss: 1.105 train_accuracy: 0.696 test_accuracy: 0.710\n",
            "[1,   315] train_loss: 0.713 train_accuracy: 0.685 test_accuracy: 0.699\n",
            "[1,   320] train_loss: 1.219 train_accuracy: 0.695 test_accuracy: 0.713\n",
            "[1,   325] train_loss: 1.161 train_accuracy: 0.688 test_accuracy: 0.721\n",
            "[1,   330] train_loss: 0.906 train_accuracy: 0.615 test_accuracy: 0.665\n",
            "[1,   335] train_loss: 0.708 train_accuracy: 0.540 test_accuracy: 0.593\n",
            "[1,   340] train_loss: 1.152 train_accuracy: 0.539 test_accuracy: 0.589\n",
            "[1,   345] train_loss: 1.360 train_accuracy: 0.605 test_accuracy: 0.624\n",
            "[1,   350] train_loss: 0.997 train_accuracy: 0.663 test_accuracy: 0.683\n",
            "[1,   355] train_loss: 0.834 train_accuracy: 0.562 test_accuracy: 0.617\n",
            "[1,   360] train_loss: 2.108 train_accuracy: 0.569 test_accuracy: 0.578\n",
            "[1,   365] train_loss: 0.890 train_accuracy: 0.571 test_accuracy: 0.564\n",
            "[1,   370] train_loss: 1.201 train_accuracy: 0.614 test_accuracy: 0.588\n",
            "[1,   375] train_loss: 1.031 train_accuracy: 0.611 test_accuracy: 0.597\n",
            "[1,   380] train_loss: 0.795 train_accuracy: 0.630 test_accuracy: 0.631\n",
            "[1,   385] train_loss: 0.816 train_accuracy: 0.678 test_accuracy: 0.680\n",
            "[1,   390] train_loss: 1.056 train_accuracy: 0.705 test_accuracy: 0.714\n",
            "[1,   395] train_loss: 1.253 train_accuracy: 0.655 test_accuracy: 0.691\n",
            "[1,   400] train_loss: 0.802 train_accuracy: 0.642 test_accuracy: 0.682\n",
            "[1,   405] train_loss: 1.089 train_accuracy: 0.659 test_accuracy: 0.696\n",
            "[1,   410] train_loss: 1.519 train_accuracy: 0.659 test_accuracy: 0.701\n",
            "[1,   415] train_loss: 1.010 train_accuracy: 0.647 test_accuracy: 0.676\n",
            "[1,   420] train_loss: 1.194 train_accuracy: 0.643 test_accuracy: 0.691\n",
            "[1,   425] train_loss: 0.775 train_accuracy: 0.638 test_accuracy: 0.685\n",
            "[1,   430] train_loss: 1.089 train_accuracy: 0.624 test_accuracy: 0.670\n",
            "[1,   435] train_loss: 0.916 train_accuracy: 0.677 test_accuracy: 0.700\n",
            "[1,   440] train_loss: 1.050 train_accuracy: 0.665 test_accuracy: 0.700\n",
            "[1,   445] train_loss: 0.683 train_accuracy: 0.647 test_accuracy: 0.679\n",
            "[1,   450] train_loss: 1.079 train_accuracy: 0.620 test_accuracy: 0.666\n",
            "[1,   455] train_loss: 0.767 train_accuracy: 0.658 test_accuracy: 0.683\n",
            "[1,   460] train_loss: 1.118 train_accuracy: 0.673 test_accuracy: 0.722\n",
            "[1,   465] train_loss: 0.663 train_accuracy: 0.645 test_accuracy: 0.672\n",
            "[1,   470] train_loss: 0.817 train_accuracy: 0.665 test_accuracy: 0.708\n",
            "[1,   475] train_loss: 0.791 train_accuracy: 0.671 test_accuracy: 0.711\n",
            "[1,   480] train_loss: 0.925 train_accuracy: 0.699 test_accuracy: 0.707\n",
            "[1,   485] train_loss: 0.587 train_accuracy: 0.675 test_accuracy: 0.660\n",
            "[1,   490] train_loss: 1.542 train_accuracy: 0.690 test_accuracy: 0.676\n",
            "[1,   495] train_loss: 1.246 train_accuracy: 0.655 test_accuracy: 0.653\n",
            "[1,   500] train_loss: 1.342 train_accuracy: 0.672 test_accuracy: 0.655\n",
            "[1,   505] train_loss: 1.652 train_accuracy: 0.680 test_accuracy: 0.685\n",
            "[1,   510] train_loss: 1.863 train_accuracy: 0.655 test_accuracy: 0.682\n",
            "[1,   515] train_loss: 0.821 train_accuracy: 0.647 test_accuracy: 0.639\n",
            "[1,   520] train_loss: 0.870 train_accuracy: 0.618 test_accuracy: 0.617\n",
            "[1,   525] train_loss: 1.348 train_accuracy: 0.651 test_accuracy: 0.665\n",
            "[1,   530] train_loss: 0.848 train_accuracy: 0.700 test_accuracy: 0.725\n",
            "[1,   535] train_loss: 1.189 train_accuracy: 0.707 test_accuracy: 0.733\n",
            "[1,   540] train_loss: 0.716 train_accuracy: 0.713 test_accuracy: 0.734\n",
            "[1,   545] train_loss: 0.567 train_accuracy: 0.757 test_accuracy: 0.771\n",
            "[1,   550] train_loss: 0.984 train_accuracy: 0.734 test_accuracy: 0.732\n",
            "[1,   555] train_loss: 0.963 train_accuracy: 0.720 test_accuracy: 0.703\n",
            "[1,   560] train_loss: 1.610 train_accuracy: 0.667 test_accuracy: 0.669\n",
            "[1,   565] train_loss: 1.072 train_accuracy: 0.618 test_accuracy: 0.638\n",
            "[1,   570] train_loss: 0.622 train_accuracy: 0.633 test_accuracy: 0.668\n",
            "[1,   575] train_loss: 1.108 train_accuracy: 0.628 test_accuracy: 0.628\n",
            "[1,   580] train_loss: 1.023 train_accuracy: 0.617 test_accuracy: 0.636\n",
            "[1,   585] train_loss: 1.121 train_accuracy: 0.653 test_accuracy: 0.701\n",
            "[1,   590] train_loss: 0.941 train_accuracy: 0.644 test_accuracy: 0.709\n",
            "[1,   595] train_loss: 0.840 train_accuracy: 0.577 test_accuracy: 0.645\n",
            "[1,   600] train_loss: 1.106 train_accuracy: 0.624 test_accuracy: 0.660\n",
            "[1,   605] train_loss: 1.038 train_accuracy: 0.601 test_accuracy: 0.609\n",
            "[1,   610] train_loss: 1.451 train_accuracy: 0.641 test_accuracy: 0.658\n",
            "[1,   615] train_loss: 1.027 train_accuracy: 0.573 test_accuracy: 0.597\n",
            "[1,   620] train_loss: 1.131 train_accuracy: 0.674 test_accuracy: 0.725\n",
            "[1,   625] train_loss: 1.277 train_accuracy: 0.717 test_accuracy: 0.753\n",
            "[1,   630] train_loss: 0.754 train_accuracy: 0.670 test_accuracy: 0.702\n",
            "[1,   635] train_loss: 1.039 train_accuracy: 0.635 test_accuracy: 0.677\n",
            "[1,   640] train_loss: 0.927 train_accuracy: 0.608 test_accuracy: 0.607\n",
            "[1,   645] train_loss: 0.828 train_accuracy: 0.586 test_accuracy: 0.574\n",
            "[1,   650] train_loss: 1.115 train_accuracy: 0.605 test_accuracy: 0.618\n",
            "[1,   655] train_loss: 1.162 train_accuracy: 0.658 test_accuracy: 0.700\n",
            "[1,   660] train_loss: 0.764 train_accuracy: 0.670 test_accuracy: 0.692\n",
            "[1,   665] train_loss: 0.907 train_accuracy: 0.693 test_accuracy: 0.707\n",
            "[1,   670] train_loss: 1.289 train_accuracy: 0.686 test_accuracy: 0.688\n",
            "[1,   675] train_loss: 0.987 train_accuracy: 0.750 test_accuracy: 0.742\n",
            "[1,   680] train_loss: 0.700 train_accuracy: 0.728 test_accuracy: 0.730\n",
            "[1,   685] train_loss: 1.029 train_accuracy: 0.702 test_accuracy: 0.734\n",
            "[1,   690] train_loss: 0.699 train_accuracy: 0.723 test_accuracy: 0.780\n",
            "[1,   695] train_loss: 0.354 train_accuracy: 0.746 test_accuracy: 0.815\n",
            "[1,   700] train_loss: 0.528 train_accuracy: 0.753 test_accuracy: 0.813\n",
            "[1,   705] train_loss: 1.274 train_accuracy: 0.768 test_accuracy: 0.787\n",
            "[1,   710] train_loss: 0.777 train_accuracy: 0.800 test_accuracy: 0.786\n",
            "[1,   715] train_loss: 1.083 train_accuracy: 0.715 test_accuracy: 0.685\n",
            "[1,   720] train_loss: 0.899 train_accuracy: 0.730 test_accuracy: 0.710\n",
            "[1,   725] train_loss: 0.581 train_accuracy: 0.768 test_accuracy: 0.745\n",
            "[1,   730] train_loss: 0.553 train_accuracy: 0.732 test_accuracy: 0.736\n",
            "[1,   735] train_loss: 1.013 train_accuracy: 0.771 test_accuracy: 0.798\n",
            "[1,   740] train_loss: 0.583 train_accuracy: 0.801 test_accuracy: 0.820\n",
            "[1,   745] train_loss: 0.229 train_accuracy: 0.688 test_accuracy: 0.717\n",
            "[1,   750] train_loss: 0.946 train_accuracy: 0.739 test_accuracy: 0.785\n",
            "[1,   755] train_loss: 0.630 train_accuracy: 0.715 test_accuracy: 0.771\n",
            "[1,   760] train_loss: 0.620 train_accuracy: 0.727 test_accuracy: 0.742\n",
            "[1,   765] train_loss: 0.782 train_accuracy: 0.780 test_accuracy: 0.795\n",
            "[1,   770] train_loss: 0.286 train_accuracy: 0.741 test_accuracy: 0.777\n",
            "[1,   775] train_loss: 0.925 train_accuracy: 0.690 test_accuracy: 0.710\n",
            "[1,   780] train_loss: 1.317 train_accuracy: 0.630 test_accuracy: 0.625\n",
            "[1,   785] train_loss: 1.623 train_accuracy: 0.691 test_accuracy: 0.675\n",
            "[1,   790] train_loss: 1.272 train_accuracy: 0.701 test_accuracy: 0.689\n",
            "[1,   795] train_loss: 0.933 train_accuracy: 0.586 test_accuracy: 0.604\n",
            "[1,   800] train_loss: 1.448 train_accuracy: 0.597 test_accuracy: 0.616\n",
            "[1,   805] train_loss: 1.468 train_accuracy: 0.670 test_accuracy: 0.709\n",
            "[1,   810] train_loss: 0.695 train_accuracy: 0.643 test_accuracy: 0.703\n",
            "[1,   815] train_loss: 1.322 train_accuracy: 0.699 test_accuracy: 0.732\n",
            "[1,   820] train_loss: 0.776 train_accuracy: 0.681 test_accuracy: 0.736\n",
            "[1,   825] train_loss: 1.192 train_accuracy: 0.652 test_accuracy: 0.711\n",
            "[1,   830] train_loss: 0.780 train_accuracy: 0.710 test_accuracy: 0.709\n",
            "[1,   835] train_loss: 1.001 train_accuracy: 0.643 test_accuracy: 0.660\n",
            "[1,   840] train_loss: 0.925 train_accuracy: 0.619 test_accuracy: 0.670\n",
            "[1,   845] train_loss: 1.601 train_accuracy: 0.668 test_accuracy: 0.722\n",
            "[1,   850] train_loss: 1.172 train_accuracy: 0.692 test_accuracy: 0.747\n",
            "[1,   855] train_loss: 0.841 train_accuracy: 0.699 test_accuracy: 0.751\n",
            "[1,   860] train_loss: 1.291 train_accuracy: 0.704 test_accuracy: 0.726\n",
            "[1,   865] train_loss: 0.828 train_accuracy: 0.769 test_accuracy: 0.774\n",
            "[1,   870] train_loss: 0.705 train_accuracy: 0.751 test_accuracy: 0.764\n",
            "[1,   875] train_loss: 0.762 train_accuracy: 0.767 test_accuracy: 0.796\n",
            "[1,   880] train_loss: 0.477 train_accuracy: 0.786 test_accuracy: 0.796\n",
            "[1,   885] train_loss: 1.117 train_accuracy: 0.791 test_accuracy: 0.790\n",
            "[1,   890] train_loss: 0.670 train_accuracy: 0.777 test_accuracy: 0.767\n",
            "[1,   895] train_loss: 0.687 train_accuracy: 0.773 test_accuracy: 0.763\n",
            "[1,   900] train_loss: 1.116 train_accuracy: 0.790 test_accuracy: 0.794\n",
            "[1,   905] train_loss: 0.421 train_accuracy: 0.782 test_accuracy: 0.798\n",
            "[1,   910] train_loss: 1.056 train_accuracy: 0.778 test_accuracy: 0.810\n",
            "[1,   915] train_loss: 0.627 train_accuracy: 0.782 test_accuracy: 0.810\n",
            "[1,   920] train_loss: 0.489 train_accuracy: 0.792 test_accuracy: 0.813\n",
            "[1,   925] train_loss: 0.627 train_accuracy: 0.804 test_accuracy: 0.808\n",
            "[1,   930] train_loss: 0.699 train_accuracy: 0.777 test_accuracy: 0.784\n",
            "[1,   935] train_loss: 0.286 train_accuracy: 0.778 test_accuracy: 0.775\n",
            "[1,   940] train_loss: 0.736 train_accuracy: 0.774 test_accuracy: 0.764\n",
            "[1,   945] train_loss: 0.659 train_accuracy: 0.710 test_accuracy: 0.690\n",
            "[1,   950] train_loss: 0.845 train_accuracy: 0.735 test_accuracy: 0.737\n",
            "[1,   955] train_loss: 0.760 train_accuracy: 0.732 test_accuracy: 0.760\n",
            "[1,   960] train_loss: 0.672 train_accuracy: 0.734 test_accuracy: 0.767\n",
            "[1,   965] train_loss: 0.706 train_accuracy: 0.690 test_accuracy: 0.699\n",
            "[1,   970] train_loss: 1.068 train_accuracy: 0.724 test_accuracy: 0.719\n",
            "[1,   975] train_loss: 0.920 train_accuracy: 0.690 test_accuracy: 0.683\n",
            "[1,   980] train_loss: 0.835 train_accuracy: 0.716 test_accuracy: 0.713\n",
            "[1,   985] train_loss: 1.041 train_accuracy: 0.706 test_accuracy: 0.696\n",
            "[1,   990] train_loss: 1.294 train_accuracy: 0.749 test_accuracy: 0.733\n",
            "[1,   995] train_loss: 0.446 train_accuracy: 0.768 test_accuracy: 0.746\n",
            "[1,  1000] train_loss: 0.687 train_accuracy: 0.740 test_accuracy: 0.757\n",
            "[1,  1005] train_loss: 0.809 train_accuracy: 0.709 test_accuracy: 0.726\n",
            "[1,  1010] train_loss: 1.116 train_accuracy: 0.698 test_accuracy: 0.696\n",
            "[1,  1015] train_loss: 1.072 train_accuracy: 0.678 test_accuracy: 0.685\n",
            "[1,  1020] train_loss: 1.753 train_accuracy: 0.706 test_accuracy: 0.728\n",
            "[1,  1025] train_loss: 0.771 train_accuracy: 0.695 test_accuracy: 0.701\n",
            "[1,  1030] train_loss: 0.683 train_accuracy: 0.702 test_accuracy: 0.696\n",
            "[1,  1035] train_loss: 1.026 train_accuracy: 0.701 test_accuracy: 0.706\n",
            "[1,  1040] train_loss: 0.697 train_accuracy: 0.734 test_accuracy: 0.708\n",
            "[1,  1045] train_loss: 0.881 train_accuracy: 0.732 test_accuracy: 0.703\n",
            "[1,  1050] train_loss: 0.430 train_accuracy: 0.772 test_accuracy: 0.755\n",
            "[1,  1055] train_loss: 1.001 train_accuracy: 0.764 test_accuracy: 0.756\n",
            "[1,  1060] train_loss: 0.503 train_accuracy: 0.764 test_accuracy: 0.774\n",
            "[1,  1065] train_loss: 0.593 train_accuracy: 0.780 test_accuracy: 0.811\n",
            "[1,  1070] train_loss: 0.577 train_accuracy: 0.795 test_accuracy: 0.814\n",
            "[1,  1075] train_loss: 0.345 train_accuracy: 0.814 test_accuracy: 0.798\n",
            "[1,  1080] train_loss: 0.295 train_accuracy: 0.769 test_accuracy: 0.757\n",
            "[1,  1085] train_loss: 0.646 train_accuracy: 0.797 test_accuracy: 0.788\n",
            "[1,  1090] train_loss: 0.546 train_accuracy: 0.765 test_accuracy: 0.757\n",
            "[1,  1095] train_loss: 0.451 train_accuracy: 0.711 test_accuracy: 0.701\n",
            "[1,  1100] train_loss: 1.422 train_accuracy: 0.737 test_accuracy: 0.726\n",
            "[1,  1105] train_loss: 0.853 train_accuracy: 0.760 test_accuracy: 0.743\n",
            "[1,  1110] train_loss: 0.689 train_accuracy: 0.765 test_accuracy: 0.757\n",
            "[1,  1115] train_loss: 0.902 train_accuracy: 0.774 test_accuracy: 0.769\n",
            "[1,  1120] train_loss: 0.483 train_accuracy: 0.736 test_accuracy: 0.748\n",
            "[1,  1125] train_loss: 0.533 train_accuracy: 0.702 test_accuracy: 0.707\n",
            "[1,  1130] train_loss: 1.281 train_accuracy: 0.727 test_accuracy: 0.740\n",
            "[1,  1135] train_loss: 0.675 train_accuracy: 0.803 test_accuracy: 0.798\n",
            "[1,  1140] train_loss: 0.922 train_accuracy: 0.817 test_accuracy: 0.797\n",
            "[1,  1145] train_loss: 0.524 train_accuracy: 0.796 test_accuracy: 0.792\n",
            "[1,  1150] train_loss: 0.602 train_accuracy: 0.757 test_accuracy: 0.749\n",
            "[1,  1155] train_loss: 0.458 train_accuracy: 0.769 test_accuracy: 0.745\n",
            "[1,  1160] train_loss: 0.827 train_accuracy: 0.772 test_accuracy: 0.753\n",
            "[1,  1165] train_loss: 0.682 train_accuracy: 0.738 test_accuracy: 0.729\n",
            "[1,  1170] train_loss: 0.566 train_accuracy: 0.727 test_accuracy: 0.742\n",
            "[1,  1175] train_loss: 0.851 train_accuracy: 0.700 test_accuracy: 0.736\n",
            "[1,  1180] train_loss: 0.692 train_accuracy: 0.719 test_accuracy: 0.754\n",
            "[1,  1185] train_loss: 1.016 train_accuracy: 0.736 test_accuracy: 0.741\n",
            "[1,  1190] train_loss: 1.136 train_accuracy: 0.767 test_accuracy: 0.768\n",
            "[1,  1195] train_loss: 0.595 train_accuracy: 0.766 test_accuracy: 0.765\n",
            "[1,  1200] train_loss: 0.497 train_accuracy: 0.766 test_accuracy: 0.750\n",
            "[1,  1205] train_loss: 0.950 train_accuracy: 0.751 test_accuracy: 0.731\n",
            "[1,  1210] train_loss: 0.591 train_accuracy: 0.801 test_accuracy: 0.774\n",
            "[1,  1215] train_loss: 0.498 train_accuracy: 0.790 test_accuracy: 0.797\n",
            "[1,  1220] train_loss: 0.581 train_accuracy: 0.777 test_accuracy: 0.787\n",
            "[1,  1225] train_loss: 0.862 train_accuracy: 0.783 test_accuracy: 0.776\n",
            "[1,  1230] train_loss: 0.588 train_accuracy: 0.784 test_accuracy: 0.775\n",
            "[1,  1235] train_loss: 1.225 train_accuracy: 0.738 test_accuracy: 0.739\n",
            "[1,  1240] train_loss: 0.518 train_accuracy: 0.721 test_accuracy: 0.728\n",
            "[1,  1245] train_loss: 0.770 train_accuracy: 0.720 test_accuracy: 0.705\n",
            "[1,  1250] train_loss: 0.706 train_accuracy: 0.736 test_accuracy: 0.711\n",
            "[1,  1255] train_loss: 1.004 train_accuracy: 0.752 test_accuracy: 0.734\n",
            "[1,  1260] train_loss: 0.866 train_accuracy: 0.771 test_accuracy: 0.746\n",
            "[1,  1265] train_loss: 1.299 train_accuracy: 0.790 test_accuracy: 0.776\n",
            "[1,  1270] train_loss: 0.785 train_accuracy: 0.809 test_accuracy: 0.818\n",
            "[1,  1275] train_loss: 0.526 train_accuracy: 0.788 test_accuracy: 0.799\n",
            "[1,  1280] train_loss: 0.449 train_accuracy: 0.791 test_accuracy: 0.800\n",
            "[1,  1285] train_loss: 0.458 train_accuracy: 0.816 test_accuracy: 0.815\n",
            "[1,  1290] train_loss: 0.430 train_accuracy: 0.800 test_accuracy: 0.796\n",
            "[1,  1295] train_loss: 0.670 train_accuracy: 0.792 test_accuracy: 0.789\n",
            "[1,  1300] train_loss: 0.566 train_accuracy: 0.774 test_accuracy: 0.779\n",
            "[1,  1305] train_loss: 0.315 train_accuracy: 0.780 test_accuracy: 0.789\n",
            "[1,  1310] train_loss: 0.396 train_accuracy: 0.784 test_accuracy: 0.796\n",
            "[1,  1315] train_loss: 0.502 train_accuracy: 0.785 test_accuracy: 0.798\n",
            "[1,  1320] train_loss: 1.140 train_accuracy: 0.774 test_accuracy: 0.770\n",
            "[1,  1325] train_loss: 0.719 train_accuracy: 0.747 test_accuracy: 0.752\n",
            "[1,  1330] train_loss: 1.216 train_accuracy: 0.751 test_accuracy: 0.753\n",
            "[1,  1335] train_loss: 1.159 train_accuracy: 0.765 test_accuracy: 0.747\n",
            "[1,  1340] train_loss: 0.751 train_accuracy: 0.750 test_accuracy: 0.731\n",
            "[1,  1345] train_loss: 0.771 train_accuracy: 0.741 test_accuracy: 0.737\n",
            "[1,  1350] train_loss: 0.757 train_accuracy: 0.743 test_accuracy: 0.738\n",
            "[1,  1355] train_loss: 0.392 train_accuracy: 0.757 test_accuracy: 0.747\n",
            "[1,  1360] train_loss: 0.398 train_accuracy: 0.771 test_accuracy: 0.744\n",
            "[1,  1365] train_loss: 1.503 train_accuracy: 0.739 test_accuracy: 0.719\n",
            "[1,  1370] train_loss: 0.546 train_accuracy: 0.620 test_accuracy: 0.614\n",
            "[1,  1375] train_loss: 0.937 train_accuracy: 0.696 test_accuracy: 0.669\n",
            "[1,  1380] train_loss: 1.001 train_accuracy: 0.741 test_accuracy: 0.702\n",
            "[1,  1385] train_loss: 0.495 train_accuracy: 0.714 test_accuracy: 0.687\n",
            "[1,  1390] train_loss: 0.968 train_accuracy: 0.708 test_accuracy: 0.698\n",
            "[1,  1395] train_loss: 0.925 train_accuracy: 0.766 test_accuracy: 0.771\n",
            "[1,  1400] train_loss: 0.975 train_accuracy: 0.751 test_accuracy: 0.748\n",
            "[1,  1405] train_loss: 0.591 train_accuracy: 0.677 test_accuracy: 0.653\n",
            "[1,  1410] train_loss: 0.866 train_accuracy: 0.744 test_accuracy: 0.718\n",
            "[1,  1415] train_loss: 0.769 train_accuracy: 0.781 test_accuracy: 0.782\n",
            "[1,  1420] train_loss: 0.613 train_accuracy: 0.775 test_accuracy: 0.782\n",
            "[1,  1425] train_loss: 0.796 train_accuracy: 0.738 test_accuracy: 0.732\n",
            "[1,  1430] train_loss: 1.367 train_accuracy: 0.760 test_accuracy: 0.767\n",
            "[1,  1435] train_loss: 0.879 train_accuracy: 0.708 test_accuracy: 0.733\n",
            "[1,  1440] train_loss: 1.289 train_accuracy: 0.763 test_accuracy: 0.777\n",
            "[1,  1445] train_loss: 0.550 train_accuracy: 0.783 test_accuracy: 0.773\n",
            "[1,  1450] train_loss: 0.604 train_accuracy: 0.690 test_accuracy: 0.671\n",
            "[1,  1455] train_loss: 1.093 train_accuracy: 0.777 test_accuracy: 0.768\n",
            "[1,  1460] train_loss: 0.601 train_accuracy: 0.784 test_accuracy: 0.792\n",
            "[1,  1465] train_loss: 0.760 train_accuracy: 0.776 test_accuracy: 0.806\n",
            "[1,  1470] train_loss: 1.213 train_accuracy: 0.810 test_accuracy: 0.824\n",
            "[1,  1475] train_loss: 0.539 train_accuracy: 0.816 test_accuracy: 0.802\n",
            "[1,  1480] train_loss: 0.263 train_accuracy: 0.769 test_accuracy: 0.742\n",
            "[1,  1485] train_loss: 1.016 train_accuracy: 0.765 test_accuracy: 0.757\n",
            "[1,  1490] train_loss: 0.661 train_accuracy: 0.776 test_accuracy: 0.775\n",
            "[1,  1495] train_loss: 0.797 train_accuracy: 0.780 test_accuracy: 0.770\n",
            "[1,  1500] train_loss: 0.679 train_accuracy: 0.777 test_accuracy: 0.767\n",
            "[1,  1505] train_loss: 1.036 train_accuracy: 0.760 test_accuracy: 0.742\n",
            "[1,  1510] train_loss: 0.404 train_accuracy: 0.726 test_accuracy: 0.707\n",
            "[1,  1515] train_loss: 0.813 train_accuracy: 0.751 test_accuracy: 0.743\n",
            "[1,  1520] train_loss: 0.806 train_accuracy: 0.721 test_accuracy: 0.712\n",
            "[1,  1525] train_loss: 1.065 train_accuracy: 0.752 test_accuracy: 0.724\n",
            "[1,  1530] train_loss: 0.865 train_accuracy: 0.721 test_accuracy: 0.698\n",
            "[1,  1535] train_loss: 1.114 train_accuracy: 0.781 test_accuracy: 0.778\n",
            "[1,  1540] train_loss: 0.780 train_accuracy: 0.800 test_accuracy: 0.794\n",
            "[1,  1545] train_loss: 0.781 train_accuracy: 0.818 test_accuracy: 0.813\n",
            "[1,  1550] train_loss: 0.682 train_accuracy: 0.788 test_accuracy: 0.792\n",
            "[1,  1555] train_loss: 0.514 train_accuracy: 0.714 test_accuracy: 0.723\n",
            "[1,  1560] train_loss: 0.531 train_accuracy: 0.718 test_accuracy: 0.721\n",
            "[1,  1565] train_loss: 0.410 train_accuracy: 0.766 test_accuracy: 0.777\n",
            "[1,  1570] train_loss: 0.389 train_accuracy: 0.789 test_accuracy: 0.799\n",
            "[1,  1575] train_loss: 0.568 train_accuracy: 0.831 test_accuracy: 0.820\n",
            "[1,  1580] train_loss: 0.748 train_accuracy: 0.837 test_accuracy: 0.827\n",
            "[1,  1585] train_loss: 0.618 train_accuracy: 0.818 test_accuracy: 0.807\n",
            "[1,  1590] train_loss: 0.530 train_accuracy: 0.803 test_accuracy: 0.783\n",
            "[1,  1595] train_loss: 0.841 train_accuracy: 0.775 test_accuracy: 0.761\n",
            "[1,  1600] train_loss: 0.689 train_accuracy: 0.740 test_accuracy: 0.730\n",
            "[1,  1605] train_loss: 0.705 train_accuracy: 0.752 test_accuracy: 0.758\n",
            "[1,  1610] train_loss: 1.349 train_accuracy: 0.761 test_accuracy: 0.754\n",
            "[1,  1615] train_loss: 0.770 train_accuracy: 0.756 test_accuracy: 0.759\n",
            "[1,  1620] train_loss: 0.555 train_accuracy: 0.797 test_accuracy: 0.794\n",
            "[1,  1625] train_loss: 0.771 train_accuracy: 0.818 test_accuracy: 0.796\n",
            "[1,  1630] train_loss: 0.733 train_accuracy: 0.791 test_accuracy: 0.772\n",
            "[1,  1635] train_loss: 0.714 train_accuracy: 0.747 test_accuracy: 0.713\n",
            "[1,  1640] train_loss: 0.733 train_accuracy: 0.761 test_accuracy: 0.734\n",
            "[1,  1645] train_loss: 0.750 train_accuracy: 0.754 test_accuracy: 0.731\n",
            "[1,  1650] train_loss: 0.909 train_accuracy: 0.765 test_accuracy: 0.728\n",
            "[1,  1655] train_loss: 0.910 train_accuracy: 0.817 test_accuracy: 0.792\n",
            "[1,  1660] train_loss: 1.079 train_accuracy: 0.812 test_accuracy: 0.801\n",
            "[1,  1665] train_loss: 0.364 train_accuracy: 0.810 test_accuracy: 0.805\n",
            "[1,  1670] train_loss: 0.753 train_accuracy: 0.800 test_accuracy: 0.806\n",
            "[1,  1675] train_loss: 0.454 train_accuracy: 0.799 test_accuracy: 0.806\n",
            "[1,  1680] train_loss: 0.829 train_accuracy: 0.806 test_accuracy: 0.804\n",
            "[1,  1685] train_loss: 0.469 train_accuracy: 0.786 test_accuracy: 0.791\n",
            "[1,  1690] train_loss: 0.521 train_accuracy: 0.784 test_accuracy: 0.788\n",
            "[1,  1695] train_loss: 0.626 train_accuracy: 0.816 test_accuracy: 0.805\n",
            "[1,  1700] train_loss: 0.631 train_accuracy: 0.822 test_accuracy: 0.813\n",
            "[1,  1705] train_loss: 0.284 train_accuracy: 0.812 test_accuracy: 0.810\n",
            "[1,  1710] train_loss: 0.270 train_accuracy: 0.798 test_accuracy: 0.795\n",
            "[1,  1715] train_loss: 0.259 train_accuracy: 0.804 test_accuracy: 0.808\n",
            "[1,  1720] train_loss: 0.657 train_accuracy: 0.816 test_accuracy: 0.828\n",
            "[1,  1725] train_loss: 0.499 train_accuracy: 0.818 test_accuracy: 0.836\n",
            "[2,     5] train_loss: 0.493 train_accuracy: 0.780 test_accuracy: 0.809\n",
            "[2,    10] train_loss: 0.415 train_accuracy: 0.771 test_accuracy: 0.797\n",
            "[2,    15] train_loss: 1.039 train_accuracy: 0.718 test_accuracy: 0.743\n",
            "[2,    20] train_loss: 0.568 train_accuracy: 0.755 test_accuracy: 0.759\n",
            "[2,    25] train_loss: 0.652 train_accuracy: 0.803 test_accuracy: 0.805\n",
            "[2,    30] train_loss: 0.472 train_accuracy: 0.758 test_accuracy: 0.764\n",
            "[2,    35] train_loss: 0.823 train_accuracy: 0.776 test_accuracy: 0.773\n",
            "[2,    40] train_loss: 0.913 train_accuracy: 0.786 test_accuracy: 0.768\n",
            "[2,    45] train_loss: 0.467 train_accuracy: 0.759 test_accuracy: 0.744\n",
            "[2,    50] train_loss: 1.120 train_accuracy: 0.758 test_accuracy: 0.736\n",
            "[2,    55] train_loss: 0.845 train_accuracy: 0.783 test_accuracy: 0.763\n",
            "[2,    60] train_loss: 0.397 train_accuracy: 0.705 test_accuracy: 0.702\n",
            "[2,    65] train_loss: 0.873 train_accuracy: 0.719 test_accuracy: 0.708\n",
            "[2,    70] train_loss: 0.666 train_accuracy: 0.721 test_accuracy: 0.695\n",
            "[2,    75] train_loss: 0.608 train_accuracy: 0.727 test_accuracy: 0.693\n",
            "[2,    80] train_loss: 0.891 train_accuracy: 0.668 test_accuracy: 0.695\n",
            "[2,    85] train_loss: 1.094 train_accuracy: 0.778 test_accuracy: 0.813\n",
            "[2,    90] train_loss: 0.848 train_accuracy: 0.772 test_accuracy: 0.758\n",
            "[2,    95] train_loss: 0.636 train_accuracy: 0.707 test_accuracy: 0.683\n",
            "[2,   100] train_loss: 0.592 train_accuracy: 0.709 test_accuracy: 0.715\n",
            "[2,   105] train_loss: 0.890 train_accuracy: 0.722 test_accuracy: 0.741\n",
            "[2,   110] train_loss: 1.256 train_accuracy: 0.659 test_accuracy: 0.668\n",
            "[2,   115] train_loss: 1.233 train_accuracy: 0.780 test_accuracy: 0.773\n",
            "[2,   120] train_loss: 0.847 train_accuracy: 0.791 test_accuracy: 0.794\n",
            "[2,   125] train_loss: 0.460 train_accuracy: 0.759 test_accuracy: 0.775\n",
            "[2,   130] train_loss: 0.426 train_accuracy: 0.741 test_accuracy: 0.756\n",
            "[2,   135] train_loss: 0.790 train_accuracy: 0.739 test_accuracy: 0.712\n",
            "[2,   140] train_loss: 1.152 train_accuracy: 0.700 test_accuracy: 0.670\n",
            "[2,   145] train_loss: 0.786 train_accuracy: 0.713 test_accuracy: 0.683\n",
            "[2,   150] train_loss: 0.848 train_accuracy: 0.739 test_accuracy: 0.730\n",
            "[2,   155] train_loss: 0.750 train_accuracy: 0.778 test_accuracy: 0.788\n",
            "[2,   160] train_loss: 0.352 train_accuracy: 0.741 test_accuracy: 0.751\n",
            "[2,   165] train_loss: 0.960 train_accuracy: 0.776 test_accuracy: 0.772\n",
            "[2,   170] train_loss: 0.812 train_accuracy: 0.711 test_accuracy: 0.713\n",
            "[2,   175] train_loss: 1.061 train_accuracy: 0.682 test_accuracy: 0.653\n",
            "[2,   180] train_loss: 0.886 train_accuracy: 0.762 test_accuracy: 0.736\n",
            "[2,   185] train_loss: 0.583 train_accuracy: 0.743 test_accuracy: 0.715\n",
            "[2,   190] train_loss: 0.890 train_accuracy: 0.762 test_accuracy: 0.740\n",
            "[2,   195] train_loss: 0.639 train_accuracy: 0.746 test_accuracy: 0.731\n",
            "[2,   200] train_loss: 0.602 train_accuracy: 0.753 test_accuracy: 0.779\n",
            "[2,   205] train_loss: 0.259 train_accuracy: 0.765 test_accuracy: 0.803\n",
            "[2,   210] train_loss: 0.793 train_accuracy: 0.776 test_accuracy: 0.793\n",
            "[2,   215] train_loss: 0.860 train_accuracy: 0.758 test_accuracy: 0.739\n",
            "[2,   220] train_loss: 0.465 train_accuracy: 0.721 test_accuracy: 0.682\n",
            "[2,   225] train_loss: 0.942 train_accuracy: 0.698 test_accuracy: 0.649\n",
            "[2,   230] train_loss: 0.838 train_accuracy: 0.628 test_accuracy: 0.581\n",
            "[2,   235] train_loss: 0.866 train_accuracy: 0.687 test_accuracy: 0.655\n",
            "[2,   240] train_loss: 0.666 train_accuracy: 0.743 test_accuracy: 0.734\n",
            "[2,   245] train_loss: 0.638 train_accuracy: 0.734 test_accuracy: 0.729\n",
            "[2,   250] train_loss: 0.666 train_accuracy: 0.731 test_accuracy: 0.722\n",
            "[2,   255] train_loss: 0.916 train_accuracy: 0.739 test_accuracy: 0.721\n",
            "[2,   260] train_loss: 0.404 train_accuracy: 0.690 test_accuracy: 0.668\n",
            "[2,   265] train_loss: 0.824 train_accuracy: 0.723 test_accuracy: 0.702\n",
            "[2,   270] train_loss: 0.809 train_accuracy: 0.749 test_accuracy: 0.729\n",
            "[2,   275] train_loss: 0.942 train_accuracy: 0.763 test_accuracy: 0.756\n",
            "[2,   280] train_loss: 0.526 train_accuracy: 0.795 test_accuracy: 0.784\n",
            "[2,   285] train_loss: 0.505 train_accuracy: 0.800 test_accuracy: 0.784\n",
            "[2,   290] train_loss: 0.852 train_accuracy: 0.792 test_accuracy: 0.774\n",
            "[2,   295] train_loss: 0.681 train_accuracy: 0.801 test_accuracy: 0.770\n",
            "[2,   300] train_loss: 1.050 train_accuracy: 0.793 test_accuracy: 0.773\n",
            "[2,   305] train_loss: 0.598 train_accuracy: 0.746 test_accuracy: 0.759\n",
            "[2,   310] train_loss: 0.352 train_accuracy: 0.734 test_accuracy: 0.763\n",
            "[2,   315] train_loss: 0.928 train_accuracy: 0.764 test_accuracy: 0.793\n",
            "[2,   320] train_loss: 0.334 train_accuracy: 0.766 test_accuracy: 0.801\n",
            "[2,   325] train_loss: 1.188 train_accuracy: 0.761 test_accuracy: 0.801\n",
            "[2,   330] train_loss: 0.243 train_accuracy: 0.721 test_accuracy: 0.741\n",
            "[2,   335] train_loss: 0.836 train_accuracy: 0.701 test_accuracy: 0.701\n",
            "[2,   340] train_loss: 1.084 train_accuracy: 0.752 test_accuracy: 0.743\n",
            "[2,   345] train_loss: 0.581 train_accuracy: 0.788 test_accuracy: 0.777\n",
            "[2,   350] train_loss: 0.523 train_accuracy: 0.772 test_accuracy: 0.773\n",
            "[2,   355] train_loss: 0.907 train_accuracy: 0.772 test_accuracy: 0.757\n",
            "[2,   360] train_loss: 0.530 train_accuracy: 0.767 test_accuracy: 0.752\n",
            "[2,   365] train_loss: 0.547 train_accuracy: 0.769 test_accuracy: 0.745\n",
            "[2,   370] train_loss: 1.100 train_accuracy: 0.702 test_accuracy: 0.681\n",
            "[2,   375] train_loss: 0.854 train_accuracy: 0.786 test_accuracy: 0.778\n",
            "[2,   380] train_loss: 0.667 train_accuracy: 0.831 test_accuracy: 0.839\n",
            "[2,   385] train_loss: 0.329 train_accuracy: 0.811 test_accuracy: 0.834\n",
            "[2,   390] train_loss: 1.076 train_accuracy: 0.812 test_accuracy: 0.825\n",
            "[2,   395] train_loss: 0.622 train_accuracy: 0.812 test_accuracy: 0.809\n",
            "[2,   400] train_loss: 0.695 train_accuracy: 0.816 test_accuracy: 0.794\n",
            "[2,   405] train_loss: 0.873 train_accuracy: 0.803 test_accuracy: 0.781\n",
            "[2,   410] train_loss: 0.710 train_accuracy: 0.804 test_accuracy: 0.793\n",
            "[2,   415] train_loss: 0.450 train_accuracy: 0.761 test_accuracy: 0.764\n",
            "[2,   420] train_loss: 0.914 train_accuracy: 0.811 test_accuracy: 0.801\n",
            "[2,   425] train_loss: 0.772 train_accuracy: 0.821 test_accuracy: 0.822\n",
            "[2,   430] train_loss: 0.279 train_accuracy: 0.815 test_accuracy: 0.807\n",
            "[2,   435] train_loss: 0.331 train_accuracy: 0.816 test_accuracy: 0.804\n",
            "[2,   440] train_loss: 0.398 train_accuracy: 0.801 test_accuracy: 0.801\n",
            "[2,   445] train_loss: 0.691 train_accuracy: 0.813 test_accuracy: 0.809\n",
            "[2,   450] train_loss: 0.937 train_accuracy: 0.822 test_accuracy: 0.817\n",
            "[2,   455] train_loss: 0.857 train_accuracy: 0.816 test_accuracy: 0.807\n",
            "[2,   460] train_loss: 0.577 train_accuracy: 0.786 test_accuracy: 0.766\n",
            "[2,   465] train_loss: 0.306 train_accuracy: 0.782 test_accuracy: 0.753\n",
            "[2,   470] train_loss: 0.355 train_accuracy: 0.762 test_accuracy: 0.736\n",
            "[2,   475] train_loss: 1.013 train_accuracy: 0.797 test_accuracy: 0.764\n",
            "[2,   480] train_loss: 0.610 train_accuracy: 0.760 test_accuracy: 0.738\n",
            "[2,   485] train_loss: 1.267 train_accuracy: 0.765 test_accuracy: 0.771\n",
            "[2,   490] train_loss: 0.633 train_accuracy: 0.774 test_accuracy: 0.782\n",
            "[2,   495] train_loss: 0.572 train_accuracy: 0.795 test_accuracy: 0.785\n",
            "[2,   500] train_loss: 0.517 train_accuracy: 0.809 test_accuracy: 0.791\n",
            "[2,   505] train_loss: 0.315 train_accuracy: 0.811 test_accuracy: 0.792\n",
            "[2,   510] train_loss: 1.055 train_accuracy: 0.822 test_accuracy: 0.802\n",
            "[2,   515] train_loss: 0.573 train_accuracy: 0.794 test_accuracy: 0.785\n",
            "[2,   520] train_loss: 0.542 train_accuracy: 0.799 test_accuracy: 0.800\n",
            "[2,   525] train_loss: 0.395 train_accuracy: 0.811 test_accuracy: 0.805\n",
            "[2,   530] train_loss: 0.534 train_accuracy: 0.807 test_accuracy: 0.796\n",
            "[2,   535] train_loss: 0.815 train_accuracy: 0.828 test_accuracy: 0.812\n",
            "[2,   540] train_loss: 0.749 train_accuracy: 0.841 test_accuracy: 0.827\n",
            "[2,   545] train_loss: 0.568 train_accuracy: 0.847 test_accuracy: 0.837\n",
            "[2,   550] train_loss: 0.722 train_accuracy: 0.858 test_accuracy: 0.848\n",
            "[2,   555] train_loss: 0.641 train_accuracy: 0.853 test_accuracy: 0.839\n",
            "[2,   560] train_loss: 0.682 train_accuracy: 0.828 test_accuracy: 0.823\n",
            "[2,   565] train_loss: 0.558 train_accuracy: 0.819 test_accuracy: 0.808\n",
            "[2,   570] train_loss: 0.430 train_accuracy: 0.783 test_accuracy: 0.778\n",
            "[2,   575] train_loss: 0.510 train_accuracy: 0.799 test_accuracy: 0.792\n",
            "[2,   580] train_loss: 0.321 train_accuracy: 0.842 test_accuracy: 0.826\n",
            "[2,   585] train_loss: 0.434 train_accuracy: 0.830 test_accuracy: 0.808\n",
            "[2,   590] train_loss: 0.306 train_accuracy: 0.814 test_accuracy: 0.791\n",
            "[2,   595] train_loss: 0.805 train_accuracy: 0.809 test_accuracy: 0.782\n",
            "[2,   600] train_loss: 0.459 train_accuracy: 0.814 test_accuracy: 0.806\n",
            "[2,   605] train_loss: 0.902 train_accuracy: 0.831 test_accuracy: 0.848\n",
            "[2,   610] train_loss: 1.034 train_accuracy: 0.811 test_accuracy: 0.821\n",
            "[2,   615] train_loss: 0.481 train_accuracy: 0.810 test_accuracy: 0.809\n",
            "[2,   620] train_loss: 0.567 train_accuracy: 0.803 test_accuracy: 0.796\n",
            "[2,   625] train_loss: 0.605 train_accuracy: 0.821 test_accuracy: 0.809\n",
            "[2,   630] train_loss: 0.413 train_accuracy: 0.803 test_accuracy: 0.790\n",
            "[2,   635] train_loss: 0.584 train_accuracy: 0.788 test_accuracy: 0.769\n",
            "[2,   640] train_loss: 0.280 train_accuracy: 0.795 test_accuracy: 0.760\n",
            "[2,   645] train_loss: 0.791 train_accuracy: 0.822 test_accuracy: 0.793\n",
            "[2,   650] train_loss: 0.414 train_accuracy: 0.784 test_accuracy: 0.784\n",
            "[2,   655] train_loss: 0.498 train_accuracy: 0.771 test_accuracy: 0.791\n",
            "[2,   660] train_loss: 0.848 train_accuracy: 0.777 test_accuracy: 0.799\n",
            "[2,   665] train_loss: 0.873 train_accuracy: 0.798 test_accuracy: 0.834\n",
            "[2,   670] train_loss: 0.293 train_accuracy: 0.805 test_accuracy: 0.834\n",
            "[2,   675] train_loss: 0.347 train_accuracy: 0.821 test_accuracy: 0.833\n",
            "[2,   680] train_loss: 0.470 train_accuracy: 0.841 test_accuracy: 0.850\n",
            "[2,   685] train_loss: 0.154 train_accuracy: 0.845 test_accuracy: 0.843\n",
            "[2,   690] train_loss: 0.331 train_accuracy: 0.858 test_accuracy: 0.843\n",
            "[2,   695] train_loss: 0.879 train_accuracy: 0.852 test_accuracy: 0.832\n",
            "[2,   700] train_loss: 0.386 train_accuracy: 0.840 test_accuracy: 0.812\n",
            "[2,   705] train_loss: 0.681 train_accuracy: 0.828 test_accuracy: 0.806\n",
            "[2,   710] train_loss: 0.715 train_accuracy: 0.840 test_accuracy: 0.807\n",
            "[2,   715] train_loss: 0.367 train_accuracy: 0.819 test_accuracy: 0.792\n",
            "[2,   720] train_loss: 0.504 train_accuracy: 0.812 test_accuracy: 0.796\n",
            "[2,   725] train_loss: 0.639 train_accuracy: 0.798 test_accuracy: 0.798\n",
            "[2,   730] train_loss: 0.747 train_accuracy: 0.812 test_accuracy: 0.813\n",
            "[2,   735] train_loss: 0.504 train_accuracy: 0.787 test_accuracy: 0.797\n",
            "[2,   740] train_loss: 0.839 train_accuracy: 0.750 test_accuracy: 0.769\n",
            "[2,   745] train_loss: 0.575 train_accuracy: 0.763 test_accuracy: 0.772\n",
            "[2,   750] train_loss: 0.376 train_accuracy: 0.772 test_accuracy: 0.777\n",
            "[2,   755] train_loss: 0.524 train_accuracy: 0.800 test_accuracy: 0.811\n",
            "[2,   760] train_loss: 0.645 train_accuracy: 0.808 test_accuracy: 0.835\n",
            "[2,   765] train_loss: 0.476 train_accuracy: 0.826 test_accuracy: 0.845\n",
            "[2,   770] train_loss: 0.216 train_accuracy: 0.830 test_accuracy: 0.842\n",
            "[2,   775] train_loss: 0.907 train_accuracy: 0.803 test_accuracy: 0.799\n",
            "[2,   780] train_loss: 1.271 train_accuracy: 0.821 test_accuracy: 0.804\n",
            "[2,   785] train_loss: 0.785 train_accuracy: 0.772 test_accuracy: 0.767\n",
            "[2,   790] train_loss: 0.666 train_accuracy: 0.791 test_accuracy: 0.776\n",
            "[2,   795] train_loss: 0.637 train_accuracy: 0.809 test_accuracy: 0.812\n",
            "[2,   800] train_loss: 1.084 train_accuracy: 0.809 test_accuracy: 0.811\n",
            "[2,   805] train_loss: 0.376 train_accuracy: 0.799 test_accuracy: 0.783\n",
            "[2,   810] train_loss: 0.581 train_accuracy: 0.800 test_accuracy: 0.789\n",
            "[2,   815] train_loss: 0.653 train_accuracy: 0.828 test_accuracy: 0.824\n",
            "[2,   820] train_loss: 0.595 train_accuracy: 0.837 test_accuracy: 0.839\n",
            "[2,   825] train_loss: 0.249 train_accuracy: 0.824 test_accuracy: 0.823\n",
            "[2,   830] train_loss: 0.400 train_accuracy: 0.812 test_accuracy: 0.813\n",
            "[2,   835] train_loss: 0.143 train_accuracy: 0.790 test_accuracy: 0.786\n",
            "[2,   840] train_loss: 0.420 train_accuracy: 0.798 test_accuracy: 0.790\n",
            "[2,   845] train_loss: 0.632 train_accuracy: 0.795 test_accuracy: 0.771\n",
            "[2,   850] train_loss: 1.148 train_accuracy: 0.798 test_accuracy: 0.762\n",
            "[2,   855] train_loss: 0.760 train_accuracy: 0.808 test_accuracy: 0.778\n",
            "[2,   860] train_loss: 0.554 train_accuracy: 0.817 test_accuracy: 0.797\n",
            "[2,   865] train_loss: 0.479 train_accuracy: 0.811 test_accuracy: 0.812\n",
            "[2,   870] train_loss: 0.429 train_accuracy: 0.796 test_accuracy: 0.806\n",
            "[2,   875] train_loss: 0.271 train_accuracy: 0.778 test_accuracy: 0.792\n",
            "[2,   880] train_loss: 0.740 train_accuracy: 0.797 test_accuracy: 0.804\n",
            "[2,   885] train_loss: 0.785 train_accuracy: 0.806 test_accuracy: 0.799\n",
            "[2,   890] train_loss: 0.857 train_accuracy: 0.837 test_accuracy: 0.822\n",
            "[2,   895] train_loss: 0.466 train_accuracy: 0.837 test_accuracy: 0.821\n",
            "[2,   900] train_loss: 0.194 train_accuracy: 0.801 test_accuracy: 0.791\n",
            "[2,   905] train_loss: 0.819 train_accuracy: 0.828 test_accuracy: 0.815\n",
            "[2,   910] train_loss: 0.445 train_accuracy: 0.812 test_accuracy: 0.797\n",
            "[2,   915] train_loss: 0.793 train_accuracy: 0.804 test_accuracy: 0.806\n",
            "[2,   920] train_loss: 0.832 train_accuracy: 0.797 test_accuracy: 0.806\n",
            "[2,   925] train_loss: 0.413 train_accuracy: 0.809 test_accuracy: 0.809\n",
            "[2,   930] train_loss: 0.803 train_accuracy: 0.845 test_accuracy: 0.848\n",
            "[2,   935] train_loss: 0.334 train_accuracy: 0.843 test_accuracy: 0.852\n",
            "[2,   940] train_loss: 0.580 train_accuracy: 0.849 test_accuracy: 0.845\n",
            "[2,   945] train_loss: 0.671 train_accuracy: 0.842 test_accuracy: 0.843\n",
            "[2,   950] train_loss: 0.616 train_accuracy: 0.835 test_accuracy: 0.836\n",
            "[2,   955] train_loss: 0.854 train_accuracy: 0.834 test_accuracy: 0.827\n",
            "[2,   960] train_loss: 0.557 train_accuracy: 0.806 test_accuracy: 0.801\n",
            "[2,   965] train_loss: 0.463 train_accuracy: 0.788 test_accuracy: 0.765\n",
            "[2,   970] train_loss: 0.594 train_accuracy: 0.806 test_accuracy: 0.778\n",
            "[2,   975] train_loss: 0.601 train_accuracy: 0.793 test_accuracy: 0.771\n",
            "[2,   980] train_loss: 0.812 train_accuracy: 0.798 test_accuracy: 0.791\n",
            "[2,   985] train_loss: 0.417 train_accuracy: 0.828 test_accuracy: 0.808\n",
            "[2,   990] train_loss: 0.675 train_accuracy: 0.836 test_accuracy: 0.815\n",
            "[2,   995] train_loss: 0.491 train_accuracy: 0.820 test_accuracy: 0.795\n",
            "[2,  1000] train_loss: 0.932 train_accuracy: 0.828 test_accuracy: 0.800\n",
            "[2,  1005] train_loss: 0.360 train_accuracy: 0.827 test_accuracy: 0.801\n",
            "[2,  1010] train_loss: 0.272 train_accuracy: 0.815 test_accuracy: 0.786\n",
            "[2,  1015] train_loss: 0.193 train_accuracy: 0.797 test_accuracy: 0.773\n",
            "[2,  1020] train_loss: 0.426 train_accuracy: 0.794 test_accuracy: 0.765\n",
            "[2,  1025] train_loss: 0.341 train_accuracy: 0.805 test_accuracy: 0.777\n",
            "[2,  1030] train_loss: 0.833 train_accuracy: 0.827 test_accuracy: 0.804\n",
            "[2,  1035] train_loss: 1.513 train_accuracy: 0.837 test_accuracy: 0.830\n",
            "[2,  1040] train_loss: 0.588 train_accuracy: 0.799 test_accuracy: 0.790\n",
            "[2,  1045] train_loss: 0.624 train_accuracy: 0.815 test_accuracy: 0.787\n",
            "[2,  1050] train_loss: 0.953 train_accuracy: 0.799 test_accuracy: 0.763\n",
            "[2,  1055] train_loss: 0.727 train_accuracy: 0.800 test_accuracy: 0.773\n",
            "[2,  1060] train_loss: 0.346 train_accuracy: 0.815 test_accuracy: 0.793\n",
            "[2,  1065] train_loss: 0.406 train_accuracy: 0.830 test_accuracy: 0.815\n",
            "[2,  1070] train_loss: 0.543 train_accuracy: 0.799 test_accuracy: 0.800\n",
            "[2,  1075] train_loss: 0.535 train_accuracy: 0.777 test_accuracy: 0.765\n",
            "[2,  1080] train_loss: 0.646 train_accuracy: 0.785 test_accuracy: 0.775\n",
            "[2,  1085] train_loss: 0.686 train_accuracy: 0.806 test_accuracy: 0.801\n",
            "[2,  1090] train_loss: 0.390 train_accuracy: 0.832 test_accuracy: 0.825\n",
            "[2,  1095] train_loss: 0.288 train_accuracy: 0.838 test_accuracy: 0.827\n",
            "[2,  1100] train_loss: 0.944 train_accuracy: 0.814 test_accuracy: 0.801\n",
            "[2,  1105] train_loss: 0.477 train_accuracy: 0.786 test_accuracy: 0.768\n",
            "[2,  1110] train_loss: 0.361 train_accuracy: 0.803 test_accuracy: 0.785\n",
            "[2,  1115] train_loss: 0.305 train_accuracy: 0.789 test_accuracy: 0.767\n",
            "[2,  1120] train_loss: 1.266 train_accuracy: 0.799 test_accuracy: 0.773\n",
            "[2,  1125] train_loss: 0.834 train_accuracy: 0.820 test_accuracy: 0.798\n",
            "[2,  1130] train_loss: 0.700 train_accuracy: 0.838 test_accuracy: 0.817\n",
            "[2,  1135] train_loss: 0.534 train_accuracy: 0.815 test_accuracy: 0.798\n",
            "[2,  1140] train_loss: 0.656 train_accuracy: 0.817 test_accuracy: 0.805\n",
            "[2,  1145] train_loss: 0.594 train_accuracy: 0.824 test_accuracy: 0.823\n",
            "[2,  1150] train_loss: 0.463 train_accuracy: 0.842 test_accuracy: 0.856\n",
            "[2,  1155] train_loss: 0.485 train_accuracy: 0.851 test_accuracy: 0.863\n",
            "[2,  1160] train_loss: 0.854 train_accuracy: 0.842 test_accuracy: 0.849\n",
            "[2,  1165] train_loss: 0.382 train_accuracy: 0.828 test_accuracy: 0.820\n",
            "[2,  1170] train_loss: 0.477 train_accuracy: 0.821 test_accuracy: 0.819\n",
            "[2,  1175] train_loss: 0.454 train_accuracy: 0.826 test_accuracy: 0.807\n",
            "[2,  1180] train_loss: 0.403 train_accuracy: 0.805 test_accuracy: 0.775\n",
            "[2,  1185] train_loss: 0.405 train_accuracy: 0.768 test_accuracy: 0.735\n",
            "[2,  1190] train_loss: 0.417 train_accuracy: 0.802 test_accuracy: 0.773\n",
            "[2,  1195] train_loss: 0.546 train_accuracy: 0.797 test_accuracy: 0.779\n",
            "[2,  1200] train_loss: 0.634 train_accuracy: 0.822 test_accuracy: 0.818\n",
            "[2,  1205] train_loss: 0.567 train_accuracy: 0.793 test_accuracy: 0.785\n",
            "[2,  1210] train_loss: 1.003 train_accuracy: 0.740 test_accuracy: 0.743\n",
            "[2,  1215] train_loss: 1.004 train_accuracy: 0.772 test_accuracy: 0.764\n",
            "[2,  1220] train_loss: 0.546 train_accuracy: 0.795 test_accuracy: 0.758\n",
            "[2,  1225] train_loss: 0.419 train_accuracy: 0.803 test_accuracy: 0.763\n",
            "[2,  1230] train_loss: 1.076 train_accuracy: 0.821 test_accuracy: 0.781\n",
            "[2,  1235] train_loss: 0.367 train_accuracy: 0.821 test_accuracy: 0.789\n",
            "[2,  1240] train_loss: 0.566 train_accuracy: 0.847 test_accuracy: 0.834\n",
            "[2,  1245] train_loss: 0.560 train_accuracy: 0.837 test_accuracy: 0.846\n",
            "[2,  1250] train_loss: 0.350 train_accuracy: 0.781 test_accuracy: 0.803\n",
            "[2,  1255] train_loss: 0.857 train_accuracy: 0.760 test_accuracy: 0.778\n",
            "[2,  1260] train_loss: 0.334 train_accuracy: 0.748 test_accuracy: 0.775\n",
            "[2,  1265] train_loss: 1.024 train_accuracy: 0.753 test_accuracy: 0.778\n",
            "[2,  1270] train_loss: 0.726 train_accuracy: 0.750 test_accuracy: 0.772\n",
            "[2,  1275] train_loss: 0.598 train_accuracy: 0.754 test_accuracy: 0.773\n",
            "[2,  1280] train_loss: 0.961 train_accuracy: 0.789 test_accuracy: 0.791\n",
            "[2,  1285] train_loss: 0.421 train_accuracy: 0.829 test_accuracy: 0.822\n",
            "[2,  1290] train_loss: 0.582 train_accuracy: 0.854 test_accuracy: 0.845\n",
            "[2,  1295] train_loss: 0.635 train_accuracy: 0.840 test_accuracy: 0.842\n",
            "[2,  1300] train_loss: 0.875 train_accuracy: 0.811 test_accuracy: 0.810\n",
            "[2,  1305] train_loss: 0.591 train_accuracy: 0.846 test_accuracy: 0.834\n",
            "[2,  1310] train_loss: 0.384 train_accuracy: 0.845 test_accuracy: 0.836\n",
            "[2,  1315] train_loss: 0.307 train_accuracy: 0.845 test_accuracy: 0.830\n",
            "[2,  1320] train_loss: 0.545 train_accuracy: 0.845 test_accuracy: 0.825\n",
            "[2,  1325] train_loss: 0.785 train_accuracy: 0.834 test_accuracy: 0.814\n",
            "[2,  1330] train_loss: 0.407 train_accuracy: 0.814 test_accuracy: 0.794\n",
            "[2,  1335] train_loss: 1.112 train_accuracy: 0.814 test_accuracy: 0.783\n",
            "[2,  1340] train_loss: 0.728 train_accuracy: 0.806 test_accuracy: 0.774\n",
            "[2,  1345] train_loss: 0.464 train_accuracy: 0.790 test_accuracy: 0.763\n",
            "[2,  1350] train_loss: 0.539 train_accuracy: 0.804 test_accuracy: 0.782\n",
            "[2,  1355] train_loss: 0.549 train_accuracy: 0.813 test_accuracy: 0.794\n",
            "[2,  1360] train_loss: 0.536 train_accuracy: 0.804 test_accuracy: 0.796\n",
            "[2,  1365] train_loss: 0.881 train_accuracy: 0.801 test_accuracy: 0.785\n",
            "[2,  1370] train_loss: 1.291 train_accuracy: 0.835 test_accuracy: 0.815\n",
            "[2,  1375] train_loss: 0.157 train_accuracy: 0.829 test_accuracy: 0.819\n",
            "[2,  1380] train_loss: 0.623 train_accuracy: 0.818 test_accuracy: 0.824\n",
            "[2,  1385] train_loss: 0.511 train_accuracy: 0.811 test_accuracy: 0.825\n",
            "[2,  1390] train_loss: 0.538 train_accuracy: 0.827 test_accuracy: 0.821\n",
            "[2,  1395] train_loss: 0.615 train_accuracy: 0.826 test_accuracy: 0.819\n",
            "[2,  1400] train_loss: 0.659 train_accuracy: 0.817 test_accuracy: 0.812\n",
            "[2,  1405] train_loss: 0.350 train_accuracy: 0.805 test_accuracy: 0.800\n",
            "[2,  1410] train_loss: 0.843 train_accuracy: 0.851 test_accuracy: 0.839\n",
            "[2,  1415] train_loss: 0.330 train_accuracy: 0.844 test_accuracy: 0.832\n",
            "[2,  1420] train_loss: 0.504 train_accuracy: 0.845 test_accuracy: 0.835\n",
            "[2,  1425] train_loss: 0.426 train_accuracy: 0.846 test_accuracy: 0.831\n",
            "[2,  1430] train_loss: 0.440 train_accuracy: 0.858 test_accuracy: 0.845\n",
            "[2,  1435] train_loss: 0.494 train_accuracy: 0.860 test_accuracy: 0.840\n",
            "[2,  1440] train_loss: 1.074 train_accuracy: 0.847 test_accuracy: 0.831\n",
            "[2,  1445] train_loss: 0.508 train_accuracy: 0.826 test_accuracy: 0.814\n",
            "[2,  1450] train_loss: 0.579 train_accuracy: 0.829 test_accuracy: 0.810\n",
            "[2,  1455] train_loss: 0.884 train_accuracy: 0.837 test_accuracy: 0.815\n",
            "[2,  1460] train_loss: 0.393 train_accuracy: 0.809 test_accuracy: 0.798\n",
            "[2,  1465] train_loss: 0.702 train_accuracy: 0.804 test_accuracy: 0.794\n",
            "[2,  1470] train_loss: 0.722 train_accuracy: 0.829 test_accuracy: 0.825\n",
            "[2,  1475] train_loss: 0.659 train_accuracy: 0.819 test_accuracy: 0.805\n",
            "[2,  1480] train_loss: 0.447 train_accuracy: 0.831 test_accuracy: 0.813\n",
            "[2,  1485] train_loss: 0.637 train_accuracy: 0.816 test_accuracy: 0.796\n",
            "[2,  1490] train_loss: 0.502 train_accuracy: 0.769 test_accuracy: 0.743\n",
            "[2,  1495] train_loss: 0.803 train_accuracy: 0.773 test_accuracy: 0.753\n",
            "[2,  1500] train_loss: 0.753 train_accuracy: 0.833 test_accuracy: 0.811\n",
            "[2,  1505] train_loss: 0.850 train_accuracy: 0.809 test_accuracy: 0.787\n",
            "[2,  1510] train_loss: 0.514 train_accuracy: 0.813 test_accuracy: 0.795\n",
            "[2,  1515] train_loss: 0.474 train_accuracy: 0.802 test_accuracy: 0.824\n",
            "[2,  1520] train_loss: 0.836 train_accuracy: 0.792 test_accuracy: 0.805\n",
            "[2,  1525] train_loss: 0.824 train_accuracy: 0.833 test_accuracy: 0.801\n",
            "[2,  1530] train_loss: 0.377 train_accuracy: 0.780 test_accuracy: 0.748\n",
            "[2,  1535] train_loss: 0.678 train_accuracy: 0.771 test_accuracy: 0.733\n",
            "[2,  1540] train_loss: 0.462 train_accuracy: 0.817 test_accuracy: 0.785\n",
            "[2,  1545] train_loss: 0.349 train_accuracy: 0.828 test_accuracy: 0.802\n",
            "[2,  1550] train_loss: 0.645 train_accuracy: 0.838 test_accuracy: 0.822\n",
            "[2,  1555] train_loss: 0.321 train_accuracy: 0.844 test_accuracy: 0.822\n",
            "[2,  1560] train_loss: 0.761 train_accuracy: 0.845 test_accuracy: 0.830\n",
            "[2,  1565] train_loss: 0.412 train_accuracy: 0.831 test_accuracy: 0.827\n",
            "[2,  1570] train_loss: 0.592 train_accuracy: 0.795 test_accuracy: 0.800\n",
            "[2,  1575] train_loss: 0.609 train_accuracy: 0.792 test_accuracy: 0.814\n",
            "[2,  1580] train_loss: 0.677 train_accuracy: 0.837 test_accuracy: 0.852\n",
            "[2,  1585] train_loss: 0.811 train_accuracy: 0.849 test_accuracy: 0.851\n",
            "[2,  1590] train_loss: 0.388 train_accuracy: 0.804 test_accuracy: 0.790\n",
            "[2,  1595] train_loss: 0.407 train_accuracy: 0.794 test_accuracy: 0.777\n",
            "[2,  1600] train_loss: 0.412 train_accuracy: 0.842 test_accuracy: 0.827\n",
            "[2,  1605] train_loss: 0.528 train_accuracy: 0.843 test_accuracy: 0.834\n",
            "[2,  1610] train_loss: 0.613 train_accuracy: 0.842 test_accuracy: 0.839\n",
            "[2,  1615] train_loss: 0.577 train_accuracy: 0.843 test_accuracy: 0.834\n",
            "[2,  1620] train_loss: 0.592 train_accuracy: 0.866 test_accuracy: 0.853\n",
            "[2,  1625] train_loss: 0.396 train_accuracy: 0.864 test_accuracy: 0.840\n",
            "[2,  1630] train_loss: 0.801 train_accuracy: 0.815 test_accuracy: 0.801\n",
            "[2,  1635] train_loss: 1.237 train_accuracy: 0.768 test_accuracy: 0.770\n",
            "[2,  1640] train_loss: 0.724 train_accuracy: 0.772 test_accuracy: 0.770\n",
            "[2,  1645] train_loss: 0.829 train_accuracy: 0.812 test_accuracy: 0.784\n",
            "[2,  1650] train_loss: 0.688 train_accuracy: 0.822 test_accuracy: 0.795\n",
            "[2,  1655] train_loss: 0.579 train_accuracy: 0.855 test_accuracy: 0.834\n",
            "[2,  1660] train_loss: 0.290 train_accuracy: 0.812 test_accuracy: 0.803\n",
            "[2,  1665] train_loss: 0.628 train_accuracy: 0.789 test_accuracy: 0.796\n",
            "[2,  1670] train_loss: 0.569 train_accuracy: 0.797 test_accuracy: 0.811\n",
            "[2,  1675] train_loss: 0.640 train_accuracy: 0.800 test_accuracy: 0.804\n",
            "[2,  1680] train_loss: 0.556 train_accuracy: 0.795 test_accuracy: 0.781\n",
            "[2,  1685] train_loss: 1.582 train_accuracy: 0.820 test_accuracy: 0.789\n",
            "[2,  1690] train_loss: 0.833 train_accuracy: 0.756 test_accuracy: 0.732\n",
            "[2,  1695] train_loss: 1.080 train_accuracy: 0.738 test_accuracy: 0.711\n",
            "[2,  1700] train_loss: 0.958 train_accuracy: 0.788 test_accuracy: 0.752\n",
            "[2,  1705] train_loss: 0.520 train_accuracy: 0.789 test_accuracy: 0.751\n",
            "[2,  1710] train_loss: 1.090 train_accuracy: 0.836 test_accuracy: 0.822\n",
            "[2,  1715] train_loss: 0.542 train_accuracy: 0.825 test_accuracy: 0.852\n",
            "[2,  1720] train_loss: 0.446 train_accuracy: 0.800 test_accuracy: 0.849\n",
            "[2,  1725] train_loss: 0.596 train_accuracy: 0.807 test_accuracy: 0.845\n",
            "[3,     5] train_loss: 0.685 train_accuracy: 0.832 test_accuracy: 0.854\n",
            "[3,    10] train_loss: 0.863 train_accuracy: 0.839 test_accuracy: 0.850\n",
            "[3,    15] train_loss: 0.649 train_accuracy: 0.837 test_accuracy: 0.834\n",
            "[3,    20] train_loss: 0.828 train_accuracy: 0.821 test_accuracy: 0.808\n",
            "[3,    25] train_loss: 0.425 train_accuracy: 0.821 test_accuracy: 0.818\n",
            "[3,    30] train_loss: 0.248 train_accuracy: 0.828 test_accuracy: 0.825\n",
            "[3,    35] train_loss: 0.743 train_accuracy: 0.818 test_accuracy: 0.823\n",
            "[3,    40] train_loss: 0.586 train_accuracy: 0.840 test_accuracy: 0.833\n",
            "[3,    45] train_loss: 0.254 train_accuracy: 0.810 test_accuracy: 0.803\n",
            "[3,    50] train_loss: 0.946 train_accuracy: 0.827 test_accuracy: 0.810\n",
            "[3,    55] train_loss: 0.565 train_accuracy: 0.838 test_accuracy: 0.820\n",
            "[3,    60] train_loss: 0.291 train_accuracy: 0.845 test_accuracy: 0.833\n",
            "[3,    65] train_loss: 0.774 train_accuracy: 0.812 test_accuracy: 0.804\n",
            "[3,    70] train_loss: 0.333 train_accuracy: 0.788 test_accuracy: 0.777\n",
            "[3,    75] train_loss: 0.728 train_accuracy: 0.832 test_accuracy: 0.820\n",
            "[3,    80] train_loss: 0.494 train_accuracy: 0.845 test_accuracy: 0.831\n",
            "[3,    85] train_loss: 0.676 train_accuracy: 0.838 test_accuracy: 0.826\n",
            "[3,    90] train_loss: 0.283 train_accuracy: 0.818 test_accuracy: 0.817\n",
            "[3,    95] train_loss: 0.282 train_accuracy: 0.818 test_accuracy: 0.814\n",
            "[3,   100] train_loss: 0.477 train_accuracy: 0.852 test_accuracy: 0.840\n",
            "[3,   105] train_loss: 0.376 train_accuracy: 0.827 test_accuracy: 0.828\n",
            "[3,   110] train_loss: 0.510 train_accuracy: 0.817 test_accuracy: 0.808\n",
            "[3,   115] train_loss: 0.993 train_accuracy: 0.805 test_accuracy: 0.781\n",
            "[3,   120] train_loss: 1.039 train_accuracy: 0.846 test_accuracy: 0.830\n",
            "[3,   125] train_loss: 0.652 train_accuracy: 0.825 test_accuracy: 0.826\n",
            "[3,   130] train_loss: 0.536 train_accuracy: 0.799 test_accuracy: 0.810\n",
            "[3,   135] train_loss: 0.342 train_accuracy: 0.772 test_accuracy: 0.782\n",
            "[3,   140] train_loss: 0.875 train_accuracy: 0.832 test_accuracy: 0.830\n",
            "[3,   145] train_loss: 0.304 train_accuracy: 0.836 test_accuracy: 0.810\n",
            "[3,   150] train_loss: 0.621 train_accuracy: 0.808 test_accuracy: 0.783\n",
            "[3,   155] train_loss: 0.455 train_accuracy: 0.803 test_accuracy: 0.775\n",
            "[3,   160] train_loss: 0.576 train_accuracy: 0.819 test_accuracy: 0.790\n",
            "[3,   165] train_loss: 0.487 train_accuracy: 0.802 test_accuracy: 0.781\n",
            "[3,   170] train_loss: 0.977 train_accuracy: 0.813 test_accuracy: 0.780\n",
            "[3,   175] train_loss: 0.273 train_accuracy: 0.793 test_accuracy: 0.767\n",
            "[3,   180] train_loss: 0.620 train_accuracy: 0.819 test_accuracy: 0.796\n",
            "[3,   185] train_loss: 0.285 train_accuracy: 0.813 test_accuracy: 0.795\n",
            "[3,   190] train_loss: 0.827 train_accuracy: 0.834 test_accuracy: 0.830\n",
            "[3,   195] train_loss: 0.392 train_accuracy: 0.848 test_accuracy: 0.832\n",
            "[3,   200] train_loss: 0.574 train_accuracy: 0.812 test_accuracy: 0.787\n",
            "[3,   205] train_loss: 0.834 train_accuracy: 0.825 test_accuracy: 0.807\n",
            "[3,   210] train_loss: 0.240 train_accuracy: 0.812 test_accuracy: 0.799\n",
            "[3,   215] train_loss: 0.264 train_accuracy: 0.800 test_accuracy: 0.791\n",
            "[3,   220] train_loss: 0.522 train_accuracy: 0.806 test_accuracy: 0.815\n",
            "[3,   225] train_loss: 0.670 train_accuracy: 0.824 test_accuracy: 0.840\n",
            "[3,   230] train_loss: 0.429 train_accuracy: 0.815 test_accuracy: 0.843\n",
            "[3,   235] train_loss: 0.849 train_accuracy: 0.843 test_accuracy: 0.847\n",
            "[3,   240] train_loss: 0.462 train_accuracy: 0.851 test_accuracy: 0.849\n",
            "[3,   245] train_loss: 0.392 train_accuracy: 0.861 test_accuracy: 0.851\n",
            "[3,   250] train_loss: 0.626 train_accuracy: 0.835 test_accuracy: 0.817\n",
            "[3,   255] train_loss: 0.421 train_accuracy: 0.837 test_accuracy: 0.813\n",
            "[3,   260] train_loss: 0.439 train_accuracy: 0.839 test_accuracy: 0.815\n",
            "[3,   265] train_loss: 0.238 train_accuracy: 0.860 test_accuracy: 0.841\n",
            "[3,   270] train_loss: 0.529 train_accuracy: 0.856 test_accuracy: 0.844\n",
            "[3,   275] train_loss: 0.763 train_accuracy: 0.847 test_accuracy: 0.831\n",
            "[3,   280] train_loss: 0.555 train_accuracy: 0.831 test_accuracy: 0.805\n",
            "[3,   285] train_loss: 0.498 train_accuracy: 0.803 test_accuracy: 0.778\n",
            "[3,   290] train_loss: 0.306 train_accuracy: 0.807 test_accuracy: 0.791\n",
            "[3,   295] train_loss: 0.602 train_accuracy: 0.830 test_accuracy: 0.828\n",
            "[3,   300] train_loss: 0.599 train_accuracy: 0.845 test_accuracy: 0.839\n",
            "[3,   305] train_loss: 0.475 train_accuracy: 0.833 test_accuracy: 0.830\n",
            "[3,   310] train_loss: 0.593 train_accuracy: 0.815 test_accuracy: 0.791\n",
            "[3,   315] train_loss: 0.655 train_accuracy: 0.824 test_accuracy: 0.796\n",
            "[3,   320] train_loss: 0.345 train_accuracy: 0.856 test_accuracy: 0.843\n",
            "[3,   325] train_loss: 0.719 train_accuracy: 0.834 test_accuracy: 0.816\n",
            "[3,   330] train_loss: 0.816 train_accuracy: 0.824 test_accuracy: 0.816\n",
            "[3,   335] train_loss: 0.328 train_accuracy: 0.830 test_accuracy: 0.813\n",
            "[3,   340] train_loss: 0.308 train_accuracy: 0.818 test_accuracy: 0.797\n",
            "[3,   345] train_loss: 0.552 train_accuracy: 0.796 test_accuracy: 0.770\n",
            "[3,   350] train_loss: 0.569 train_accuracy: 0.800 test_accuracy: 0.781\n",
            "[3,   355] train_loss: 1.302 train_accuracy: 0.828 test_accuracy: 0.824\n",
            "[3,   360] train_loss: 0.254 train_accuracy: 0.837 test_accuracy: 0.840\n",
            "[3,   365] train_loss: 0.589 train_accuracy: 0.833 test_accuracy: 0.838\n",
            "[3,   370] train_loss: 0.528 train_accuracy: 0.863 test_accuracy: 0.851\n",
            "[3,   375] train_loss: 0.444 train_accuracy: 0.855 test_accuracy: 0.825\n",
            "[3,   380] train_loss: 0.456 train_accuracy: 0.830 test_accuracy: 0.793\n",
            "[3,   385] train_loss: 0.856 train_accuracy: 0.848 test_accuracy: 0.810\n",
            "[3,   390] train_loss: 0.411 train_accuracy: 0.816 test_accuracy: 0.776\n",
            "[3,   395] train_loss: 0.152 train_accuracy: 0.788 test_accuracy: 0.749\n",
            "[3,   400] train_loss: 0.984 train_accuracy: 0.794 test_accuracy: 0.752\n",
            "[3,   405] train_loss: 0.703 train_accuracy: 0.800 test_accuracy: 0.759\n",
            "[3,   410] train_loss: 0.260 train_accuracy: 0.788 test_accuracy: 0.757\n",
            "[3,   415] train_loss: 0.601 train_accuracy: 0.806 test_accuracy: 0.778\n",
            "[3,   420] train_loss: 0.510 train_accuracy: 0.823 test_accuracy: 0.812\n",
            "[3,   425] train_loss: 0.409 train_accuracy: 0.810 test_accuracy: 0.794\n",
            "[3,   430] train_loss: 0.832 train_accuracy: 0.794 test_accuracy: 0.794\n",
            "[3,   435] train_loss: 0.893 train_accuracy: 0.790 test_accuracy: 0.783\n",
            "[3,   440] train_loss: 0.625 train_accuracy: 0.800 test_accuracy: 0.791\n",
            "[3,   445] train_loss: 0.736 train_accuracy: 0.836 test_accuracy: 0.830\n",
            "[3,   450] train_loss: 0.832 train_accuracy: 0.858 test_accuracy: 0.851\n",
            "[3,   455] train_loss: 0.524 train_accuracy: 0.862 test_accuracy: 0.847\n",
            "[3,   460] train_loss: 0.564 train_accuracy: 0.856 test_accuracy: 0.836\n",
            "[3,   465] train_loss: 0.313 train_accuracy: 0.837 test_accuracy: 0.808\n",
            "[3,   470] train_loss: 0.379 train_accuracy: 0.828 test_accuracy: 0.802\n",
            "[3,   475] train_loss: 0.499 train_accuracy: 0.803 test_accuracy: 0.784\n",
            "[3,   480] train_loss: 0.666 train_accuracy: 0.804 test_accuracy: 0.793\n",
            "[3,   485] train_loss: 0.325 train_accuracy: 0.807 test_accuracy: 0.783\n",
            "[3,   490] train_loss: 0.683 train_accuracy: 0.806 test_accuracy: 0.783\n",
            "[3,   495] train_loss: 1.036 train_accuracy: 0.844 test_accuracy: 0.826\n",
            "[3,   500] train_loss: 0.373 train_accuracy: 0.843 test_accuracy: 0.819\n",
            "[3,   505] train_loss: 0.481 train_accuracy: 0.815 test_accuracy: 0.775\n",
            "[3,   510] train_loss: 0.410 train_accuracy: 0.805 test_accuracy: 0.774\n",
            "[3,   515] train_loss: 1.235 train_accuracy: 0.797 test_accuracy: 0.780\n",
            "[3,   520] train_loss: 1.019 train_accuracy: 0.818 test_accuracy: 0.807\n",
            "[3,   525] train_loss: 0.469 train_accuracy: 0.790 test_accuracy: 0.784\n",
            "[3,   530] train_loss: 0.690 train_accuracy: 0.816 test_accuracy: 0.829\n",
            "[3,   535] train_loss: 1.170 train_accuracy: 0.832 test_accuracy: 0.842\n",
            "[3,   540] train_loss: 0.562 train_accuracy: 0.834 test_accuracy: 0.854\n",
            "[3,   545] train_loss: 0.778 train_accuracy: 0.779 test_accuracy: 0.807\n",
            "[3,   550] train_loss: 0.395 train_accuracy: 0.765 test_accuracy: 0.764\n",
            "[3,   555] train_loss: 0.632 train_accuracy: 0.811 test_accuracy: 0.792\n",
            "[3,   560] train_loss: 0.302 train_accuracy: 0.813 test_accuracy: 0.784\n",
            "[3,   565] train_loss: 0.831 train_accuracy: 0.828 test_accuracy: 0.803\n",
            "[3,   570] train_loss: 0.574 train_accuracy: 0.805 test_accuracy: 0.820\n",
            "[3,   575] train_loss: 0.694 train_accuracy: 0.814 test_accuracy: 0.820\n",
            "[3,   580] train_loss: 0.218 train_accuracy: 0.790 test_accuracy: 0.785\n",
            "[3,   585] train_loss: 0.713 train_accuracy: 0.801 test_accuracy: 0.791\n",
            "[3,   590] train_loss: 1.030 train_accuracy: 0.821 test_accuracy: 0.793\n",
            "[3,   595] train_loss: 0.887 train_accuracy: 0.806 test_accuracy: 0.794\n",
            "[3,   600] train_loss: 0.520 train_accuracy: 0.789 test_accuracy: 0.777\n",
            "[3,   605] train_loss: 0.503 train_accuracy: 0.756 test_accuracy: 0.750\n",
            "[3,   610] train_loss: 0.556 train_accuracy: 0.824 test_accuracy: 0.819\n",
            "[3,   615] train_loss: 0.613 train_accuracy: 0.833 test_accuracy: 0.823\n",
            "[3,   620] train_loss: 0.393 train_accuracy: 0.821 test_accuracy: 0.809\n",
            "[3,   625] train_loss: 0.478 train_accuracy: 0.801 test_accuracy: 0.820\n",
            "[3,   630] train_loss: 0.604 train_accuracy: 0.825 test_accuracy: 0.846\n",
            "[3,   635] train_loss: 0.241 train_accuracy: 0.801 test_accuracy: 0.834\n",
            "[3,   640] train_loss: 0.524 train_accuracy: 0.793 test_accuracy: 0.819\n",
            "[3,   645] train_loss: 0.645 train_accuracy: 0.804 test_accuracy: 0.803\n",
            "[3,   650] train_loss: 1.143 train_accuracy: 0.818 test_accuracy: 0.802\n",
            "[3,   655] train_loss: 1.087 train_accuracy: 0.845 test_accuracy: 0.837\n",
            "[3,   660] train_loss: 0.569 train_accuracy: 0.803 test_accuracy: 0.793\n",
            "[3,   665] train_loss: 0.661 train_accuracy: 0.822 test_accuracy: 0.810\n",
            "[3,   670] train_loss: 0.984 train_accuracy: 0.795 test_accuracy: 0.788\n",
            "[3,   675] train_loss: 1.000 train_accuracy: 0.847 test_accuracy: 0.830\n",
            "[3,   680] train_loss: 0.514 train_accuracy: 0.837 test_accuracy: 0.810\n",
            "[3,   685] train_loss: 0.507 train_accuracy: 0.811 test_accuracy: 0.790\n",
            "[3,   690] train_loss: 0.851 train_accuracy: 0.804 test_accuracy: 0.794\n",
            "[3,   695] train_loss: 0.887 train_accuracy: 0.827 test_accuracy: 0.820\n",
            "[3,   700] train_loss: 0.334 train_accuracy: 0.813 test_accuracy: 0.804\n",
            "[3,   705] train_loss: 0.324 train_accuracy: 0.816 test_accuracy: 0.795\n",
            "[3,   710] train_loss: 0.204 train_accuracy: 0.809 test_accuracy: 0.776\n",
            "[3,   715] train_loss: 0.647 train_accuracy: 0.830 test_accuracy: 0.795\n",
            "[3,   720] train_loss: 1.181 train_accuracy: 0.864 test_accuracy: 0.841\n",
            "[3,   725] train_loss: 1.057 train_accuracy: 0.791 test_accuracy: 0.771\n",
            "[3,   730] train_loss: 0.469 train_accuracy: 0.748 test_accuracy: 0.732\n",
            "[3,   735] train_loss: 0.966 train_accuracy: 0.827 test_accuracy: 0.824\n",
            "[3,   740] train_loss: 0.379 train_accuracy: 0.842 test_accuracy: 0.840\n",
            "[3,   745] train_loss: 0.418 train_accuracy: 0.839 test_accuracy: 0.828\n",
            "[3,   750] train_loss: 0.514 train_accuracy: 0.836 test_accuracy: 0.818\n",
            "[3,   755] train_loss: 0.495 train_accuracy: 0.847 test_accuracy: 0.825\n",
            "[3,   760] train_loss: 0.606 train_accuracy: 0.862 test_accuracy: 0.839\n",
            "[3,   765] train_loss: 0.236 train_accuracy: 0.845 test_accuracy: 0.824\n",
            "[3,   770] train_loss: 0.554 train_accuracy: 0.868 test_accuracy: 0.850\n",
            "[3,   775] train_loss: 0.525 train_accuracy: 0.869 test_accuracy: 0.847\n",
            "[3,   780] train_loss: 0.520 train_accuracy: 0.858 test_accuracy: 0.834\n",
            "[3,   785] train_loss: 0.693 train_accuracy: 0.834 test_accuracy: 0.804\n",
            "[3,   790] train_loss: 0.487 train_accuracy: 0.812 test_accuracy: 0.776\n",
            "[3,   795] train_loss: 1.364 train_accuracy: 0.822 test_accuracy: 0.798\n",
            "[3,   800] train_loss: 0.500 train_accuracy: 0.825 test_accuracy: 0.814\n",
            "[3,   805] train_loss: 0.528 train_accuracy: 0.825 test_accuracy: 0.811\n",
            "[3,   810] train_loss: 0.554 train_accuracy: 0.813 test_accuracy: 0.787\n",
            "[3,   815] train_loss: 0.533 train_accuracy: 0.794 test_accuracy: 0.760\n",
            "[3,   820] train_loss: 0.524 train_accuracy: 0.778 test_accuracy: 0.745\n",
            "[3,   825] train_loss: 1.084 train_accuracy: 0.840 test_accuracy: 0.815\n",
            "[3,   830] train_loss: 0.320 train_accuracy: 0.838 test_accuracy: 0.807\n",
            "[3,   835] train_loss: 0.384 train_accuracy: 0.804 test_accuracy: 0.775\n",
            "[3,   840] train_loss: 0.394 train_accuracy: 0.833 test_accuracy: 0.811\n",
            "[3,   845] train_loss: 0.363 train_accuracy: 0.833 test_accuracy: 0.816\n",
            "[3,   850] train_loss: 1.006 train_accuracy: 0.810 test_accuracy: 0.797\n",
            "[3,   855] train_loss: 0.476 train_accuracy: 0.847 test_accuracy: 0.837\n",
            "[3,   860] train_loss: 1.555 train_accuracy: 0.850 test_accuracy: 0.827\n",
            "[3,   865] train_loss: 0.874 train_accuracy: 0.799 test_accuracy: 0.774\n",
            "[3,   870] train_loss: 0.409 train_accuracy: 0.740 test_accuracy: 0.723\n",
            "[3,   875] train_loss: 0.878 train_accuracy: 0.818 test_accuracy: 0.804\n",
            "[3,   880] train_loss: 0.551 train_accuracy: 0.832 test_accuracy: 0.834\n",
            "[3,   885] train_loss: 0.282 train_accuracy: 0.783 test_accuracy: 0.790\n",
            "[3,   890] train_loss: 0.526 train_accuracy: 0.834 test_accuracy: 0.832\n",
            "[3,   895] train_loss: 0.510 train_accuracy: 0.824 test_accuracy: 0.819\n",
            "[3,   900] train_loss: 0.696 train_accuracy: 0.816 test_accuracy: 0.805\n",
            "[3,   905] train_loss: 0.564 train_accuracy: 0.748 test_accuracy: 0.724\n",
            "[3,   910] train_loss: 0.585 train_accuracy: 0.773 test_accuracy: 0.737\n",
            "[3,   915] train_loss: 1.052 train_accuracy: 0.801 test_accuracy: 0.767\n",
            "[3,   920] train_loss: 0.561 train_accuracy: 0.776 test_accuracy: 0.742\n",
            "[3,   925] train_loss: 0.497 train_accuracy: 0.817 test_accuracy: 0.784\n",
            "[3,   930] train_loss: 1.095 train_accuracy: 0.780 test_accuracy: 0.782\n",
            "[3,   935] train_loss: 1.375 train_accuracy: 0.732 test_accuracy: 0.714\n",
            "[3,   940] train_loss: 1.245 train_accuracy: 0.785 test_accuracy: 0.748\n",
            "[3,   945] train_loss: 0.456 train_accuracy: 0.830 test_accuracy: 0.803\n",
            "[3,   950] train_loss: 0.829 train_accuracy: 0.816 test_accuracy: 0.811\n",
            "[3,   955] train_loss: 0.644 train_accuracy: 0.811 test_accuracy: 0.801\n",
            "[3,   960] train_loss: 1.103 train_accuracy: 0.859 test_accuracy: 0.833\n",
            "[3,   965] train_loss: 0.864 train_accuracy: 0.825 test_accuracy: 0.810\n",
            "[3,   970] train_loss: 0.485 train_accuracy: 0.843 test_accuracy: 0.828\n",
            "[3,   975] train_loss: 0.523 train_accuracy: 0.867 test_accuracy: 0.852\n",
            "[3,   980] train_loss: 0.319 train_accuracy: 0.867 test_accuracy: 0.846\n",
            "[3,   985] train_loss: 0.261 train_accuracy: 0.884 test_accuracy: 0.860\n",
            "[3,   990] train_loss: 0.150 train_accuracy: 0.879 test_accuracy: 0.853\n",
            "[3,   995] train_loss: 0.435 train_accuracy: 0.873 test_accuracy: 0.843\n",
            "[3,  1000] train_loss: 0.725 train_accuracy: 0.865 test_accuracy: 0.847\n",
            "[3,  1005] train_loss: 0.442 train_accuracy: 0.842 test_accuracy: 0.835\n",
            "[3,  1010] train_loss: 0.286 train_accuracy: 0.803 test_accuracy: 0.800\n",
            "[3,  1015] train_loss: 0.685 train_accuracy: 0.792 test_accuracy: 0.778\n",
            "[3,  1020] train_loss: 0.403 train_accuracy: 0.819 test_accuracy: 0.793\n",
            "[3,  1025] train_loss: 0.736 train_accuracy: 0.839 test_accuracy: 0.827\n",
            "[3,  1030] train_loss: 0.180 train_accuracy: 0.860 test_accuracy: 0.845\n",
            "[3,  1035] train_loss: 0.451 train_accuracy: 0.850 test_accuracy: 0.839\n",
            "[3,  1040] train_loss: 0.606 train_accuracy: 0.846 test_accuracy: 0.818\n",
            "[3,  1045] train_loss: 0.583 train_accuracy: 0.799 test_accuracy: 0.788\n",
            "[3,  1050] train_loss: 0.657 train_accuracy: 0.823 test_accuracy: 0.818\n",
            "[3,  1055] train_loss: 0.558 train_accuracy: 0.868 test_accuracy: 0.849\n",
            "[3,  1060] train_loss: 0.286 train_accuracy: 0.872 test_accuracy: 0.848\n",
            "[3,  1065] train_loss: 0.155 train_accuracy: 0.844 test_accuracy: 0.819\n",
            "[3,  1070] train_loss: 0.467 train_accuracy: 0.820 test_accuracy: 0.794\n",
            "[3,  1075] train_loss: 0.645 train_accuracy: 0.806 test_accuracy: 0.780\n",
            "[3,  1080] train_loss: 0.769 train_accuracy: 0.818 test_accuracy: 0.792\n",
            "[3,  1085] train_loss: 0.235 train_accuracy: 0.839 test_accuracy: 0.825\n",
            "[3,  1090] train_loss: 0.665 train_accuracy: 0.838 test_accuracy: 0.831\n",
            "[3,  1095] train_loss: 0.298 train_accuracy: 0.793 test_accuracy: 0.805\n",
            "[3,  1100] train_loss: 0.272 train_accuracy: 0.796 test_accuracy: 0.808\n",
            "[3,  1105] train_loss: 0.599 train_accuracy: 0.801 test_accuracy: 0.805\n",
            "[3,  1110] train_loss: 0.898 train_accuracy: 0.820 test_accuracy: 0.827\n",
            "[3,  1115] train_loss: 0.310 train_accuracy: 0.808 test_accuracy: 0.809\n",
            "[3,  1120] train_loss: 0.941 train_accuracy: 0.817 test_accuracy: 0.792\n",
            "[3,  1125] train_loss: 0.410 train_accuracy: 0.840 test_accuracy: 0.812\n",
            "[3,  1130] train_loss: 0.620 train_accuracy: 0.825 test_accuracy: 0.795\n",
            "[3,  1135] train_loss: 0.357 train_accuracy: 0.823 test_accuracy: 0.792\n",
            "[3,  1140] train_loss: 1.011 train_accuracy: 0.826 test_accuracy: 0.800\n",
            "[3,  1145] train_loss: 0.140 train_accuracy: 0.785 test_accuracy: 0.756\n",
            "[3,  1150] train_loss: 1.270 train_accuracy: 0.810 test_accuracy: 0.774\n",
            "[3,  1155] train_loss: 0.392 train_accuracy: 0.813 test_accuracy: 0.801\n",
            "[3,  1160] train_loss: 0.508 train_accuracy: 0.774 test_accuracy: 0.766\n",
            "[3,  1165] train_loss: 0.753 train_accuracy: 0.746 test_accuracy: 0.759\n",
            "[3,  1170] train_loss: 0.751 train_accuracy: 0.793 test_accuracy: 0.809\n",
            "[3,  1175] train_loss: 0.700 train_accuracy: 0.835 test_accuracy: 0.829\n",
            "[3,  1180] train_loss: 0.799 train_accuracy: 0.774 test_accuracy: 0.750\n",
            "[3,  1185] train_loss: 0.514 train_accuracy: 0.784 test_accuracy: 0.747\n",
            "[3,  1190] train_loss: 1.019 train_accuracy: 0.822 test_accuracy: 0.780\n",
            "[3,  1195] train_loss: 0.611 train_accuracy: 0.783 test_accuracy: 0.745\n",
            "[3,  1200] train_loss: 0.486 train_accuracy: 0.831 test_accuracy: 0.804\n",
            "[3,  1205] train_loss: 0.323 train_accuracy: 0.831 test_accuracy: 0.801\n",
            "[3,  1210] train_loss: 0.737 train_accuracy: 0.855 test_accuracy: 0.821\n",
            "[3,  1215] train_loss: 0.358 train_accuracy: 0.865 test_accuracy: 0.852\n",
            "[3,  1220] train_loss: 0.494 train_accuracy: 0.844 test_accuracy: 0.827\n",
            "[3,  1225] train_loss: 0.287 train_accuracy: 0.841 test_accuracy: 0.813\n",
            "[3,  1230] train_loss: 0.350 train_accuracy: 0.849 test_accuracy: 0.822\n",
            "[3,  1235] train_loss: 0.473 train_accuracy: 0.864 test_accuracy: 0.830\n",
            "[3,  1240] train_loss: 0.597 train_accuracy: 0.874 test_accuracy: 0.841\n",
            "[3,  1245] train_loss: 0.886 train_accuracy: 0.869 test_accuracy: 0.842\n",
            "[3,  1250] train_loss: 0.369 train_accuracy: 0.839 test_accuracy: 0.814\n",
            "[3,  1255] train_loss: 0.289 train_accuracy: 0.824 test_accuracy: 0.812\n",
            "[3,  1260] train_loss: 0.658 train_accuracy: 0.835 test_accuracy: 0.833\n",
            "[3,  1265] train_loss: 0.641 train_accuracy: 0.841 test_accuracy: 0.829\n",
            "[3,  1270] train_loss: 0.340 train_accuracy: 0.811 test_accuracy: 0.806\n",
            "[3,  1275] train_loss: 0.925 train_accuracy: 0.838 test_accuracy: 0.829\n",
            "[3,  1280] train_loss: 0.576 train_accuracy: 0.848 test_accuracy: 0.826\n",
            "[3,  1285] train_loss: 0.623 train_accuracy: 0.808 test_accuracy: 0.793\n",
            "[3,  1290] train_loss: 0.518 train_accuracy: 0.843 test_accuracy: 0.829\n",
            "[3,  1295] train_loss: 0.690 train_accuracy: 0.877 test_accuracy: 0.853\n",
            "[3,  1300] train_loss: 0.234 train_accuracy: 0.853 test_accuracy: 0.831\n",
            "[3,  1305] train_loss: 0.650 train_accuracy: 0.853 test_accuracy: 0.825\n",
            "[3,  1310] train_loss: 0.419 train_accuracy: 0.851 test_accuracy: 0.817\n",
            "[3,  1315] train_loss: 0.308 train_accuracy: 0.850 test_accuracy: 0.818\n",
            "[3,  1320] train_loss: 1.201 train_accuracy: 0.850 test_accuracy: 0.815\n",
            "[3,  1325] train_loss: 0.524 train_accuracy: 0.869 test_accuracy: 0.836\n",
            "[3,  1330] train_loss: 0.333 train_accuracy: 0.871 test_accuracy: 0.848\n",
            "[3,  1335] train_loss: 0.186 train_accuracy: 0.865 test_accuracy: 0.834\n",
            "[3,  1340] train_loss: 0.428 train_accuracy: 0.860 test_accuracy: 0.827\n",
            "[3,  1345] train_loss: 0.375 train_accuracy: 0.838 test_accuracy: 0.812\n",
            "[3,  1350] train_loss: 0.399 train_accuracy: 0.835 test_accuracy: 0.800\n",
            "[3,  1355] train_loss: 0.265 train_accuracy: 0.839 test_accuracy: 0.804\n",
            "[3,  1360] train_loss: 0.476 train_accuracy: 0.844 test_accuracy: 0.810\n",
            "[3,  1365] train_loss: 0.516 train_accuracy: 0.860 test_accuracy: 0.824\n",
            "[3,  1370] train_loss: 0.351 train_accuracy: 0.866 test_accuracy: 0.832\n",
            "[3,  1375] train_loss: 0.377 train_accuracy: 0.859 test_accuracy: 0.830\n",
            "[3,  1380] train_loss: 0.586 train_accuracy: 0.867 test_accuracy: 0.837\n",
            "[3,  1385] train_loss: 0.142 train_accuracy: 0.861 test_accuracy: 0.826\n",
            "[3,  1390] train_loss: 0.661 train_accuracy: 0.858 test_accuracy: 0.836\n",
            "[3,  1395] train_loss: 0.497 train_accuracy: 0.848 test_accuracy: 0.838\n",
            "[3,  1400] train_loss: 1.429 train_accuracy: 0.849 test_accuracy: 0.837\n",
            "[3,  1405] train_loss: 0.385 train_accuracy: 0.795 test_accuracy: 0.793\n",
            "[3,  1410] train_loss: 0.354 train_accuracy: 0.793 test_accuracy: 0.795\n",
            "[3,  1415] train_loss: 0.715 train_accuracy: 0.829 test_accuracy: 0.828\n",
            "[3,  1420] train_loss: 0.296 train_accuracy: 0.860 test_accuracy: 0.837\n",
            "[3,  1425] train_loss: 0.611 train_accuracy: 0.874 test_accuracy: 0.843\n",
            "[3,  1430] train_loss: 0.305 train_accuracy: 0.876 test_accuracy: 0.848\n",
            "[3,  1435] train_loss: 0.429 train_accuracy: 0.884 test_accuracy: 0.860\n",
            "[3,  1440] train_loss: 0.581 train_accuracy: 0.867 test_accuracy: 0.853\n",
            "[3,  1445] train_loss: 0.373 train_accuracy: 0.861 test_accuracy: 0.846\n",
            "[3,  1450] train_loss: 0.211 train_accuracy: 0.849 test_accuracy: 0.830\n",
            "[3,  1455] train_loss: 0.352 train_accuracy: 0.844 test_accuracy: 0.825\n",
            "[3,  1460] train_loss: 0.747 train_accuracy: 0.850 test_accuracy: 0.844\n",
            "[3,  1465] train_loss: 0.959 train_accuracy: 0.828 test_accuracy: 0.819\n",
            "[3,  1470] train_loss: 0.629 train_accuracy: 0.860 test_accuracy: 0.833\n",
            "[3,  1475] train_loss: 0.399 train_accuracy: 0.849 test_accuracy: 0.808\n",
            "[3,  1480] train_loss: 0.428 train_accuracy: 0.830 test_accuracy: 0.789\n",
            "[3,  1485] train_loss: 0.944 train_accuracy: 0.814 test_accuracy: 0.772\n",
            "[3,  1490] train_loss: 0.722 train_accuracy: 0.828 test_accuracy: 0.789\n",
            "[3,  1495] train_loss: 0.315 train_accuracy: 0.830 test_accuracy: 0.803\n",
            "[3,  1500] train_loss: 0.265 train_accuracy: 0.808 test_accuracy: 0.785\n",
            "[3,  1505] train_loss: 0.815 train_accuracy: 0.854 test_accuracy: 0.822\n",
            "[3,  1510] train_loss: 0.329 train_accuracy: 0.867 test_accuracy: 0.832\n",
            "[3,  1515] train_loss: 0.203 train_accuracy: 0.839 test_accuracy: 0.812\n",
            "[3,  1520] train_loss: 0.619 train_accuracy: 0.792 test_accuracy: 0.782\n",
            "[3,  1525] train_loss: 0.555 train_accuracy: 0.779 test_accuracy: 0.765\n",
            "[3,  1530] train_loss: 0.812 train_accuracy: 0.793 test_accuracy: 0.783\n",
            "[3,  1535] train_loss: 0.502 train_accuracy: 0.798 test_accuracy: 0.776\n",
            "[3,  1540] train_loss: 0.844 train_accuracy: 0.837 test_accuracy: 0.823\n",
            "[3,  1545] train_loss: 0.427 train_accuracy: 0.812 test_accuracy: 0.815\n",
            "[3,  1550] train_loss: 0.642 train_accuracy: 0.806 test_accuracy: 0.808\n",
            "[3,  1555] train_loss: 1.074 train_accuracy: 0.826 test_accuracy: 0.813\n",
            "[3,  1560] train_loss: 0.885 train_accuracy: 0.810 test_accuracy: 0.789\n",
            "[3,  1565] train_loss: 0.808 train_accuracy: 0.810 test_accuracy: 0.785\n",
            "[3,  1570] train_loss: 0.712 train_accuracy: 0.832 test_accuracy: 0.814\n",
            "[3,  1575] train_loss: 0.708 train_accuracy: 0.824 test_accuracy: 0.801\n",
            "[3,  1580] train_loss: 0.674 train_accuracy: 0.791 test_accuracy: 0.790\n",
            "[3,  1585] train_loss: 0.854 train_accuracy: 0.824 test_accuracy: 0.820\n",
            "[3,  1590] train_loss: 0.726 train_accuracy: 0.831 test_accuracy: 0.847\n",
            "[3,  1595] train_loss: 0.449 train_accuracy: 0.821 test_accuracy: 0.846\n",
            "[3,  1600] train_loss: 0.279 train_accuracy: 0.824 test_accuracy: 0.850\n",
            "[3,  1605] train_loss: 0.201 train_accuracy: 0.832 test_accuracy: 0.858\n",
            "[3,  1610] train_loss: 0.453 train_accuracy: 0.839 test_accuracy: 0.862\n",
            "[3,  1615] train_loss: 0.683 train_accuracy: 0.855 test_accuracy: 0.871\n",
            "[3,  1620] train_loss: 0.453 train_accuracy: 0.852 test_accuracy: 0.863\n",
            "[3,  1625] train_loss: 0.540 train_accuracy: 0.861 test_accuracy: 0.856\n",
            "[3,  1630] train_loss: 0.588 train_accuracy: 0.885 test_accuracy: 0.866\n",
            "[3,  1635] train_loss: 0.661 train_accuracy: 0.893 test_accuracy: 0.859\n",
            "[3,  1640] train_loss: 0.380 train_accuracy: 0.876 test_accuracy: 0.842\n",
            "[3,  1645] train_loss: 0.246 train_accuracy: 0.862 test_accuracy: 0.824\n",
            "[3,  1650] train_loss: 0.568 train_accuracy: 0.857 test_accuracy: 0.823\n",
            "[3,  1655] train_loss: 0.342 train_accuracy: 0.854 test_accuracy: 0.829\n",
            "[3,  1660] train_loss: 0.505 train_accuracy: 0.854 test_accuracy: 0.839\n",
            "[3,  1665] train_loss: 0.556 train_accuracy: 0.849 test_accuracy: 0.827\n",
            "[3,  1670] train_loss: 0.362 train_accuracy: 0.836 test_accuracy: 0.813\n",
            "[3,  1675] train_loss: 0.289 train_accuracy: 0.842 test_accuracy: 0.820\n",
            "[3,  1680] train_loss: 0.655 train_accuracy: 0.842 test_accuracy: 0.823\n",
            "[3,  1685] train_loss: 0.543 train_accuracy: 0.849 test_accuracy: 0.834\n",
            "[3,  1690] train_loss: 0.410 train_accuracy: 0.850 test_accuracy: 0.827\n",
            "[3,  1695] train_loss: 0.356 train_accuracy: 0.849 test_accuracy: 0.820\n",
            "[3,  1700] train_loss: 0.356 train_accuracy: 0.858 test_accuracy: 0.829\n",
            "[3,  1705] train_loss: 0.444 train_accuracy: 0.839 test_accuracy: 0.821\n",
            "[3,  1710] train_loss: 0.684 train_accuracy: 0.828 test_accuracy: 0.805\n",
            "[3,  1715] train_loss: 1.127 train_accuracy: 0.847 test_accuracy: 0.812\n",
            "[3,  1720] train_loss: 0.247 train_accuracy: 0.793 test_accuracy: 0.757\n",
            "[3,  1725] train_loss: 0.683 train_accuracy: 0.799 test_accuracy: 0.770\n",
            "[4,     5] train_loss: 0.596 train_accuracy: 0.808 test_accuracy: 0.775\n",
            "[4,    10] train_loss: 0.480 train_accuracy: 0.842 test_accuracy: 0.807\n",
            "[4,    15] train_loss: 1.022 train_accuracy: 0.864 test_accuracy: 0.840\n",
            "[4,    20] train_loss: 0.329 train_accuracy: 0.869 test_accuracy: 0.843\n",
            "[4,    25] train_loss: 0.326 train_accuracy: 0.869 test_accuracy: 0.841\n",
            "[4,    30] train_loss: 0.461 train_accuracy: 0.868 test_accuracy: 0.832\n",
            "[4,    35] train_loss: 0.207 train_accuracy: 0.858 test_accuracy: 0.826\n",
            "[4,    40] train_loss: 0.403 train_accuracy: 0.832 test_accuracy: 0.809\n",
            "[4,    45] train_loss: 0.395 train_accuracy: 0.814 test_accuracy: 0.792\n",
            "[4,    50] train_loss: 0.368 train_accuracy: 0.825 test_accuracy: 0.799\n",
            "[4,    55] train_loss: 0.358 train_accuracy: 0.842 test_accuracy: 0.812\n",
            "[4,    60] train_loss: 0.927 train_accuracy: 0.870 test_accuracy: 0.847\n",
            "[4,    65] train_loss: 0.657 train_accuracy: 0.873 test_accuracy: 0.842\n",
            "[4,    70] train_loss: 0.335 train_accuracy: 0.832 test_accuracy: 0.784\n",
            "[4,    75] train_loss: 0.744 train_accuracy: 0.795 test_accuracy: 0.751\n",
            "[4,    80] train_loss: 0.589 train_accuracy: 0.796 test_accuracy: 0.758\n",
            "[4,    85] train_loss: 0.761 train_accuracy: 0.842 test_accuracy: 0.808\n",
            "[4,    90] train_loss: 0.278 train_accuracy: 0.844 test_accuracy: 0.824\n",
            "[4,    95] train_loss: 0.581 train_accuracy: 0.851 test_accuracy: 0.836\n",
            "[4,   100] train_loss: 0.370 train_accuracy: 0.868 test_accuracy: 0.860\n",
            "[4,   105] train_loss: 0.312 train_accuracy: 0.867 test_accuracy: 0.862\n",
            "[4,   110] train_loss: 0.416 train_accuracy: 0.862 test_accuracy: 0.860\n",
            "[4,   115] train_loss: 0.168 train_accuracy: 0.844 test_accuracy: 0.845\n",
            "[4,   120] train_loss: 0.423 train_accuracy: 0.836 test_accuracy: 0.833\n",
            "[4,   125] train_loss: 0.132 train_accuracy: 0.826 test_accuracy: 0.814\n",
            "[4,   130] train_loss: 1.192 train_accuracy: 0.867 test_accuracy: 0.841\n",
            "[4,   135] train_loss: 0.929 train_accuracy: 0.842 test_accuracy: 0.823\n",
            "[4,   140] train_loss: 0.480 train_accuracy: 0.857 test_accuracy: 0.837\n",
            "[4,   145] train_loss: 0.405 train_accuracy: 0.831 test_accuracy: 0.817\n",
            "[4,   150] train_loss: 0.316 train_accuracy: 0.824 test_accuracy: 0.809\n",
            "[4,   155] train_loss: 0.358 train_accuracy: 0.823 test_accuracy: 0.807\n",
            "[4,   160] train_loss: 0.498 train_accuracy: 0.839 test_accuracy: 0.822\n",
            "[4,   165] train_loss: 0.347 train_accuracy: 0.858 test_accuracy: 0.834\n",
            "[4,   170] train_loss: 0.251 train_accuracy: 0.872 test_accuracy: 0.846\n",
            "[4,   175] train_loss: 0.337 train_accuracy: 0.869 test_accuracy: 0.841\n",
            "[4,   180] train_loss: 0.118 train_accuracy: 0.864 test_accuracy: 0.842\n",
            "[4,   185] train_loss: 0.240 train_accuracy: 0.865 test_accuracy: 0.844\n",
            "[4,   190] train_loss: 0.320 train_accuracy: 0.869 test_accuracy: 0.848\n",
            "[4,   195] train_loss: 0.333 train_accuracy: 0.874 test_accuracy: 0.848\n",
            "[4,   200] train_loss: 0.458 train_accuracy: 0.855 test_accuracy: 0.825\n",
            "[4,   205] train_loss: 0.427 train_accuracy: 0.838 test_accuracy: 0.808\n",
            "[4,   210] train_loss: 0.697 train_accuracy: 0.840 test_accuracy: 0.814\n",
            "[4,   215] train_loss: 0.723 train_accuracy: 0.836 test_accuracy: 0.814\n",
            "[4,   220] train_loss: 0.485 train_accuracy: 0.824 test_accuracy: 0.805\n",
            "[4,   225] train_loss: 0.462 train_accuracy: 0.854 test_accuracy: 0.830\n",
            "[4,   230] train_loss: 0.326 train_accuracy: 0.853 test_accuracy: 0.836\n",
            "[4,   235] train_loss: 0.430 train_accuracy: 0.856 test_accuracy: 0.845\n",
            "[4,   240] train_loss: 0.148 train_accuracy: 0.870 test_accuracy: 0.865\n",
            "[4,   245] train_loss: 0.422 train_accuracy: 0.878 test_accuracy: 0.873\n",
            "[4,   250] train_loss: 0.472 train_accuracy: 0.889 test_accuracy: 0.874\n",
            "[4,   255] train_loss: 0.408 train_accuracy: 0.875 test_accuracy: 0.850\n",
            "[4,   260] train_loss: 0.273 train_accuracy: 0.868 test_accuracy: 0.841\n",
            "[4,   265] train_loss: 0.461 train_accuracy: 0.878 test_accuracy: 0.855\n",
            "[4,   270] train_loss: 0.359 train_accuracy: 0.876 test_accuracy: 0.855\n",
            "[4,   275] train_loss: 0.281 train_accuracy: 0.876 test_accuracy: 0.847\n",
            "[4,   280] train_loss: 0.114 train_accuracy: 0.883 test_accuracy: 0.851\n",
            "[4,   285] train_loss: 0.332 train_accuracy: 0.881 test_accuracy: 0.857\n",
            "[4,   290] train_loss: 0.189 train_accuracy: 0.860 test_accuracy: 0.850\n",
            "[4,   295] train_loss: 0.295 train_accuracy: 0.846 test_accuracy: 0.838\n",
            "[4,   300] train_loss: 0.604 train_accuracy: 0.836 test_accuracy: 0.823\n",
            "[4,   305] train_loss: 1.045 train_accuracy: 0.843 test_accuracy: 0.830\n",
            "[4,   310] train_loss: 0.823 train_accuracy: 0.805 test_accuracy: 0.793\n",
            "[4,   315] train_loss: 0.419 train_accuracy: 0.781 test_accuracy: 0.778\n",
            "[4,   320] train_loss: 0.795 train_accuracy: 0.805 test_accuracy: 0.809\n",
            "[4,   325] train_loss: 0.427 train_accuracy: 0.828 test_accuracy: 0.816\n",
            "[4,   330] train_loss: 0.118 train_accuracy: 0.818 test_accuracy: 0.799\n",
            "[4,   335] train_loss: 0.258 train_accuracy: 0.831 test_accuracy: 0.808\n",
            "[4,   340] train_loss: 0.617 train_accuracy: 0.848 test_accuracy: 0.827\n",
            "[4,   345] train_loss: 0.257 train_accuracy: 0.822 test_accuracy: 0.805\n",
            "[4,   350] train_loss: 0.653 train_accuracy: 0.838 test_accuracy: 0.822\n",
            "[4,   355] train_loss: 0.406 train_accuracy: 0.851 test_accuracy: 0.840\n",
            "[4,   360] train_loss: 0.375 train_accuracy: 0.857 test_accuracy: 0.851\n",
            "[4,   365] train_loss: 0.176 train_accuracy: 0.845 test_accuracy: 0.837\n",
            "[4,   370] train_loss: 1.074 train_accuracy: 0.858 test_accuracy: 0.850\n",
            "[4,   375] train_loss: 0.511 train_accuracy: 0.849 test_accuracy: 0.840\n",
            "[4,   380] train_loss: 0.456 train_accuracy: 0.842 test_accuracy: 0.826\n",
            "[4,   385] train_loss: 0.670 train_accuracy: 0.838 test_accuracy: 0.804\n",
            "[4,   390] train_loss: 0.640 train_accuracy: 0.815 test_accuracy: 0.779\n",
            "[4,   395] train_loss: 0.207 train_accuracy: 0.797 test_accuracy: 0.770\n",
            "[4,   400] train_loss: 0.902 train_accuracy: 0.809 test_accuracy: 0.792\n",
            "[4,   405] train_loss: 0.408 train_accuracy: 0.810 test_accuracy: 0.807\n",
            "[4,   410] train_loss: 0.527 train_accuracy: 0.841 test_accuracy: 0.826\n",
            "[4,   415] train_loss: 0.498 train_accuracy: 0.878 test_accuracy: 0.851\n",
            "[4,   420] train_loss: 0.349 train_accuracy: 0.867 test_accuracy: 0.840\n",
            "[4,   425] train_loss: 0.471 train_accuracy: 0.856 test_accuracy: 0.829\n",
            "[4,   430] train_loss: 0.668 train_accuracy: 0.847 test_accuracy: 0.814\n",
            "[4,   435] train_loss: 0.551 train_accuracy: 0.845 test_accuracy: 0.816\n",
            "[4,   440] train_loss: 0.605 train_accuracy: 0.852 test_accuracy: 0.819\n",
            "[4,   445] train_loss: 0.472 train_accuracy: 0.838 test_accuracy: 0.819\n",
            "[4,   450] train_loss: 0.246 train_accuracy: 0.809 test_accuracy: 0.803\n",
            "[4,   455] train_loss: 0.846 train_accuracy: 0.807 test_accuracy: 0.814\n",
            "[4,   460] train_loss: 0.197 train_accuracy: 0.838 test_accuracy: 0.849\n",
            "[4,   465] train_loss: 0.252 train_accuracy: 0.851 test_accuracy: 0.851\n",
            "[4,   470] train_loss: 0.343 train_accuracy: 0.869 test_accuracy: 0.860\n",
            "[4,   475] train_loss: 0.572 train_accuracy: 0.878 test_accuracy: 0.852\n",
            "[4,   480] train_loss: 0.421 train_accuracy: 0.886 test_accuracy: 0.855\n",
            "[4,   485] train_loss: 0.440 train_accuracy: 0.872 test_accuracy: 0.853\n",
            "[4,   490] train_loss: 0.343 train_accuracy: 0.851 test_accuracy: 0.850\n",
            "[4,   495] train_loss: 0.795 train_accuracy: 0.854 test_accuracy: 0.833\n",
            "[4,   500] train_loss: 0.162 train_accuracy: 0.824 test_accuracy: 0.806\n",
            "[4,   505] train_loss: 0.702 train_accuracy: 0.861 test_accuracy: 0.840\n",
            "[4,   510] train_loss: 0.758 train_accuracy: 0.843 test_accuracy: 0.829\n",
            "[4,   515] train_loss: 0.295 train_accuracy: 0.837 test_accuracy: 0.827\n",
            "[4,   520] train_loss: 0.402 train_accuracy: 0.851 test_accuracy: 0.844\n",
            "[4,   525] train_loss: 0.309 train_accuracy: 0.799 test_accuracy: 0.809\n",
            "[4,   530] train_loss: 0.387 train_accuracy: 0.789 test_accuracy: 0.818\n",
            "[4,   535] train_loss: 0.896 train_accuracy: 0.799 test_accuracy: 0.830\n",
            "[4,   540] train_loss: 0.337 train_accuracy: 0.805 test_accuracy: 0.825\n",
            "[4,   545] train_loss: 0.611 train_accuracy: 0.836 test_accuracy: 0.833\n",
            "[4,   550] train_loss: 0.767 train_accuracy: 0.821 test_accuracy: 0.814\n",
            "[4,   555] train_loss: 0.484 train_accuracy: 0.815 test_accuracy: 0.798\n",
            "[4,   560] train_loss: 0.353 train_accuracy: 0.768 test_accuracy: 0.738\n",
            "[4,   565] train_loss: 1.176 train_accuracy: 0.821 test_accuracy: 0.789\n",
            "[4,   570] train_loss: 0.432 train_accuracy: 0.830 test_accuracy: 0.782\n",
            "[4,   575] train_loss: 0.399 train_accuracy: 0.837 test_accuracy: 0.790\n",
            "[4,   580] train_loss: 0.310 train_accuracy: 0.817 test_accuracy: 0.773\n",
            "[4,   585] train_loss: 0.581 train_accuracy: 0.825 test_accuracy: 0.792\n",
            "[4,   590] train_loss: 0.358 train_accuracy: 0.858 test_accuracy: 0.824\n",
            "[4,   595] train_loss: 0.975 train_accuracy: 0.869 test_accuracy: 0.831\n",
            "[4,   600] train_loss: 0.454 train_accuracy: 0.868 test_accuracy: 0.829\n",
            "[4,   605] train_loss: 0.753 train_accuracy: 0.851 test_accuracy: 0.809\n",
            "[4,   610] train_loss: 0.560 train_accuracy: 0.835 test_accuracy: 0.793\n",
            "[4,   615] train_loss: 0.538 train_accuracy: 0.851 test_accuracy: 0.812\n",
            "[4,   620] train_loss: 0.670 train_accuracy: 0.854 test_accuracy: 0.831\n",
            "[4,   625] train_loss: 0.210 train_accuracy: 0.844 test_accuracy: 0.820\n",
            "[4,   630] train_loss: 0.473 train_accuracy: 0.822 test_accuracy: 0.800\n",
            "[4,   635] train_loss: 0.679 train_accuracy: 0.802 test_accuracy: 0.777\n",
            "[4,   640] train_loss: 0.492 train_accuracy: 0.820 test_accuracy: 0.794\n",
            "[4,   645] train_loss: 0.461 train_accuracy: 0.860 test_accuracy: 0.829\n",
            "[4,   650] train_loss: 0.456 train_accuracy: 0.875 test_accuracy: 0.849\n",
            "[4,   655] train_loss: 0.612 train_accuracy: 0.874 test_accuracy: 0.853\n",
            "[4,   660] train_loss: 0.303 train_accuracy: 0.861 test_accuracy: 0.843\n",
            "[4,   665] train_loss: 0.239 train_accuracy: 0.843 test_accuracy: 0.823\n",
            "[4,   670] train_loss: 0.722 train_accuracy: 0.862 test_accuracy: 0.833\n",
            "[4,   675] train_loss: 0.487 train_accuracy: 0.851 test_accuracy: 0.806\n",
            "[4,   680] train_loss: 0.554 train_accuracy: 0.865 test_accuracy: 0.830\n",
            "[4,   685] train_loss: 0.273 train_accuracy: 0.865 test_accuracy: 0.829\n",
            "[4,   690] train_loss: 0.231 train_accuracy: 0.862 test_accuracy: 0.820\n",
            "[4,   695] train_loss: 0.288 train_accuracy: 0.873 test_accuracy: 0.833\n",
            "[4,   700] train_loss: 0.390 train_accuracy: 0.863 test_accuracy: 0.833\n",
            "[4,   705] train_loss: 0.644 train_accuracy: 0.853 test_accuracy: 0.829\n",
            "[4,   710] train_loss: 0.810 train_accuracy: 0.865 test_accuracy: 0.842\n",
            "[4,   715] train_loss: 0.768 train_accuracy: 0.869 test_accuracy: 0.838\n",
            "[4,   720] train_loss: 0.198 train_accuracy: 0.843 test_accuracy: 0.808\n",
            "[4,   725] train_loss: 0.329 train_accuracy: 0.831 test_accuracy: 0.805\n",
            "[4,   730] train_loss: 0.491 train_accuracy: 0.823 test_accuracy: 0.797\n",
            "[4,   735] train_loss: 0.854 train_accuracy: 0.827 test_accuracy: 0.805\n",
            "[4,   740] train_loss: 0.325 train_accuracy: 0.871 test_accuracy: 0.848\n",
            "[4,   745] train_loss: 0.304 train_accuracy: 0.867 test_accuracy: 0.843\n",
            "[4,   750] train_loss: 0.560 train_accuracy: 0.863 test_accuracy: 0.838\n",
            "[4,   755] train_loss: 0.528 train_accuracy: 0.865 test_accuracy: 0.842\n",
            "[4,   760] train_loss: 0.709 train_accuracy: 0.852 test_accuracy: 0.824\n",
            "[4,   765] train_loss: 0.746 train_accuracy: 0.848 test_accuracy: 0.822\n",
            "[4,   770] train_loss: 0.386 train_accuracy: 0.832 test_accuracy: 0.807\n",
            "[4,   775] train_loss: 0.253 train_accuracy: 0.836 test_accuracy: 0.807\n",
            "[4,   780] train_loss: 0.462 train_accuracy: 0.854 test_accuracy: 0.827\n",
            "[4,   785] train_loss: 0.336 train_accuracy: 0.865 test_accuracy: 0.825\n",
            "[4,   790] train_loss: 0.444 train_accuracy: 0.849 test_accuracy: 0.811\n",
            "[4,   795] train_loss: 0.629 train_accuracy: 0.856 test_accuracy: 0.824\n",
            "[4,   800] train_loss: 0.127 train_accuracy: 0.855 test_accuracy: 0.825\n",
            "[4,   805] train_loss: 0.484 train_accuracy: 0.855 test_accuracy: 0.827\n",
            "[4,   810] train_loss: 0.140 train_accuracy: 0.835 test_accuracy: 0.810\n",
            "[4,   815] train_loss: 0.243 train_accuracy: 0.821 test_accuracy: 0.795\n",
            "[4,   820] train_loss: 0.291 train_accuracy: 0.837 test_accuracy: 0.790\n",
            "[4,   825] train_loss: 0.897 train_accuracy: 0.869 test_accuracy: 0.823\n",
            "[4,   830] train_loss: 0.477 train_accuracy: 0.859 test_accuracy: 0.823\n",
            "[4,   835] train_loss: 0.393 train_accuracy: 0.849 test_accuracy: 0.835\n",
            "[4,   840] train_loss: 0.482 train_accuracy: 0.845 test_accuracy: 0.839\n",
            "[4,   845] train_loss: 0.372 train_accuracy: 0.848 test_accuracy: 0.835\n",
            "[4,   850] train_loss: 0.327 train_accuracy: 0.842 test_accuracy: 0.831\n",
            "[4,   855] train_loss: 1.126 train_accuracy: 0.875 test_accuracy: 0.846\n",
            "[4,   860] train_loss: 0.397 train_accuracy: 0.882 test_accuracy: 0.852\n",
            "[4,   865] train_loss: 0.331 train_accuracy: 0.865 test_accuracy: 0.842\n",
            "[4,   870] train_loss: 0.531 train_accuracy: 0.860 test_accuracy: 0.835\n",
            "[4,   875] train_loss: 0.405 train_accuracy: 0.836 test_accuracy: 0.818\n",
            "[4,   880] train_loss: 0.333 train_accuracy: 0.814 test_accuracy: 0.803\n",
            "[4,   885] train_loss: 0.432 train_accuracy: 0.810 test_accuracy: 0.796\n",
            "[4,   890] train_loss: 0.415 train_accuracy: 0.829 test_accuracy: 0.806\n",
            "[4,   895] train_loss: 0.573 train_accuracy: 0.838 test_accuracy: 0.815\n",
            "[4,   900] train_loss: 0.114 train_accuracy: 0.847 test_accuracy: 0.816\n",
            "[4,   905] train_loss: 0.471 train_accuracy: 0.853 test_accuracy: 0.812\n",
            "[4,   910] train_loss: 1.283 train_accuracy: 0.854 test_accuracy: 0.817\n",
            "[4,   915] train_loss: 0.578 train_accuracy: 0.854 test_accuracy: 0.821\n",
            "[4,   920] train_loss: 0.611 train_accuracy: 0.869 test_accuracy: 0.831\n",
            "[4,   925] train_loss: 0.533 train_accuracy: 0.867 test_accuracy: 0.830\n",
            "[4,   930] train_loss: 0.795 train_accuracy: 0.856 test_accuracy: 0.824\n",
            "[4,   935] train_loss: 0.462 train_accuracy: 0.869 test_accuracy: 0.841\n",
            "[4,   940] train_loss: 0.484 train_accuracy: 0.875 test_accuracy: 0.845\n",
            "[4,   945] train_loss: 0.278 train_accuracy: 0.857 test_accuracy: 0.824\n",
            "[4,   950] train_loss: 0.357 train_accuracy: 0.823 test_accuracy: 0.793\n",
            "[4,   955] train_loss: 0.261 train_accuracy: 0.835 test_accuracy: 0.796\n",
            "[4,   960] train_loss: 0.613 train_accuracy: 0.863 test_accuracy: 0.833\n",
            "[4,   965] train_loss: 0.607 train_accuracy: 0.872 test_accuracy: 0.851\n",
            "[4,   970] train_loss: 0.682 train_accuracy: 0.835 test_accuracy: 0.818\n",
            "[4,   975] train_loss: 0.909 train_accuracy: 0.832 test_accuracy: 0.801\n",
            "[4,   980] train_loss: 0.473 train_accuracy: 0.832 test_accuracy: 0.814\n",
            "[4,   985] train_loss: 0.790 train_accuracy: 0.830 test_accuracy: 0.831\n",
            "[4,   990] train_loss: 0.533 train_accuracy: 0.849 test_accuracy: 0.842\n",
            "[4,   995] train_loss: 0.445 train_accuracy: 0.833 test_accuracy: 0.831\n",
            "[4,  1000] train_loss: 0.541 train_accuracy: 0.829 test_accuracy: 0.839\n",
            "[4,  1005] train_loss: 0.311 train_accuracy: 0.812 test_accuracy: 0.828\n",
            "[4,  1010] train_loss: 0.668 train_accuracy: 0.802 test_accuracy: 0.823\n",
            "[4,  1015] train_loss: 1.005 train_accuracy: 0.835 test_accuracy: 0.849\n",
            "[4,  1020] train_loss: 0.620 train_accuracy: 0.836 test_accuracy: 0.836\n",
            "[4,  1025] train_loss: 0.236 train_accuracy: 0.786 test_accuracy: 0.767\n",
            "[4,  1030] train_loss: 0.571 train_accuracy: 0.765 test_accuracy: 0.744\n",
            "[4,  1035] train_loss: 0.760 train_accuracy: 0.827 test_accuracy: 0.788\n",
            "[4,  1040] train_loss: 0.516 train_accuracy: 0.787 test_accuracy: 0.752\n",
            "[4,  1045] train_loss: 0.666 train_accuracy: 0.740 test_accuracy: 0.709\n",
            "[4,  1050] train_loss: 0.413 train_accuracy: 0.795 test_accuracy: 0.761\n",
            "[4,  1055] train_loss: 0.271 train_accuracy: 0.757 test_accuracy: 0.725\n",
            "[4,  1060] train_loss: 0.436 train_accuracy: 0.808 test_accuracy: 0.783\n",
            "[4,  1065] train_loss: 0.520 train_accuracy: 0.840 test_accuracy: 0.822\n",
            "[4,  1070] train_loss: 1.289 train_accuracy: 0.815 test_accuracy: 0.803\n",
            "[4,  1075] train_loss: 0.807 train_accuracy: 0.848 test_accuracy: 0.818\n",
            "[4,  1080] train_loss: 0.576 train_accuracy: 0.842 test_accuracy: 0.823\n",
            "[4,  1085] train_loss: 0.531 train_accuracy: 0.792 test_accuracy: 0.790\n",
            "[4,  1090] train_loss: 0.256 train_accuracy: 0.821 test_accuracy: 0.802\n",
            "[4,  1095] train_loss: 0.392 train_accuracy: 0.837 test_accuracy: 0.808\n",
            "[4,  1100] train_loss: 0.263 train_accuracy: 0.819 test_accuracy: 0.792\n",
            "[4,  1105] train_loss: 0.069 train_accuracy: 0.796 test_accuracy: 0.775\n",
            "[4,  1110] train_loss: 0.496 train_accuracy: 0.814 test_accuracy: 0.795\n",
            "[4,  1115] train_loss: 0.439 train_accuracy: 0.849 test_accuracy: 0.822\n",
            "[4,  1120] train_loss: 0.443 train_accuracy: 0.815 test_accuracy: 0.805\n",
            "[4,  1125] train_loss: 0.374 train_accuracy: 0.822 test_accuracy: 0.831\n",
            "[4,  1130] train_loss: 0.541 train_accuracy: 0.836 test_accuracy: 0.840\n",
            "[4,  1135] train_loss: 0.709 train_accuracy: 0.800 test_accuracy: 0.787\n",
            "[4,  1140] train_loss: 0.280 train_accuracy: 0.787 test_accuracy: 0.771\n",
            "[4,  1145] train_loss: 0.451 train_accuracy: 0.795 test_accuracy: 0.779\n",
            "[4,  1150] train_loss: 0.909 train_accuracy: 0.840 test_accuracy: 0.817\n",
            "[4,  1155] train_loss: 0.962 train_accuracy: 0.857 test_accuracy: 0.837\n",
            "[4,  1160] train_loss: 0.275 train_accuracy: 0.789 test_accuracy: 0.773\n",
            "[4,  1165] train_loss: 0.360 train_accuracy: 0.807 test_accuracy: 0.791\n",
            "[4,  1170] train_loss: 0.553 train_accuracy: 0.850 test_accuracy: 0.828\n",
            "[4,  1175] train_loss: 0.694 train_accuracy: 0.846 test_accuracy: 0.825\n",
            "[4,  1180] train_loss: 0.657 train_accuracy: 0.831 test_accuracy: 0.817\n",
            "[4,  1185] train_loss: 0.620 train_accuracy: 0.815 test_accuracy: 0.791\n",
            "[4,  1190] train_loss: 0.692 train_accuracy: 0.813 test_accuracy: 0.780\n",
            "[4,  1195] train_loss: 1.034 train_accuracy: 0.831 test_accuracy: 0.819\n",
            "[4,  1200] train_loss: 0.490 train_accuracy: 0.829 test_accuracy: 0.823\n",
            "[4,  1205] train_loss: 0.272 train_accuracy: 0.854 test_accuracy: 0.835\n",
            "[4,  1210] train_loss: 0.147 train_accuracy: 0.859 test_accuracy: 0.849\n",
            "[4,  1215] train_loss: 0.892 train_accuracy: 0.859 test_accuracy: 0.846\n",
            "[4,  1220] train_loss: 0.298 train_accuracy: 0.856 test_accuracy: 0.835\n",
            "[4,  1225] train_loss: 0.547 train_accuracy: 0.863 test_accuracy: 0.826\n",
            "[4,  1230] train_loss: 0.409 train_accuracy: 0.855 test_accuracy: 0.817\n",
            "[4,  1235] train_loss: 0.349 train_accuracy: 0.859 test_accuracy: 0.821\n",
            "[4,  1240] train_loss: 1.126 train_accuracy: 0.858 test_accuracy: 0.822\n",
            "[4,  1245] train_loss: 0.456 train_accuracy: 0.848 test_accuracy: 0.817\n",
            "[4,  1250] train_loss: 0.541 train_accuracy: 0.818 test_accuracy: 0.789\n",
            "[4,  1255] train_loss: 0.624 train_accuracy: 0.832 test_accuracy: 0.818\n",
            "[4,  1260] train_loss: 0.562 train_accuracy: 0.808 test_accuracy: 0.797\n",
            "[4,  1265] train_loss: 0.729 train_accuracy: 0.850 test_accuracy: 0.831\n",
            "[4,  1270] train_loss: 0.372 train_accuracy: 0.862 test_accuracy: 0.854\n",
            "[4,  1275] train_loss: 0.540 train_accuracy: 0.860 test_accuracy: 0.860\n",
            "[4,  1280] train_loss: 0.320 train_accuracy: 0.852 test_accuracy: 0.862\n",
            "[4,  1285] train_loss: 0.932 train_accuracy: 0.841 test_accuracy: 0.856\n",
            "[4,  1290] train_loss: 0.727 train_accuracy: 0.836 test_accuracy: 0.849\n",
            "[4,  1295] train_loss: 0.451 train_accuracy: 0.836 test_accuracy: 0.834\n",
            "[4,  1300] train_loss: 0.775 train_accuracy: 0.850 test_accuracy: 0.830\n",
            "[4,  1305] train_loss: 0.754 train_accuracy: 0.835 test_accuracy: 0.802\n",
            "[4,  1310] train_loss: 0.521 train_accuracy: 0.806 test_accuracy: 0.777\n",
            "[4,  1315] train_loss: 0.413 train_accuracy: 0.833 test_accuracy: 0.805\n",
            "[4,  1320] train_loss: 0.191 train_accuracy: 0.863 test_accuracy: 0.843\n",
            "[4,  1325] train_loss: 0.959 train_accuracy: 0.848 test_accuracy: 0.814\n",
            "[4,  1330] train_loss: 0.533 train_accuracy: 0.848 test_accuracy: 0.820\n",
            "[4,  1335] train_loss: 0.456 train_accuracy: 0.850 test_accuracy: 0.831\n",
            "[4,  1340] train_loss: 0.455 train_accuracy: 0.836 test_accuracy: 0.819\n",
            "[4,  1345] train_loss: 0.251 train_accuracy: 0.841 test_accuracy: 0.829\n",
            "[4,  1350] train_loss: 0.348 train_accuracy: 0.834 test_accuracy: 0.825\n",
            "[4,  1355] train_loss: 0.432 train_accuracy: 0.834 test_accuracy: 0.812\n",
            "[4,  1360] train_loss: 0.287 train_accuracy: 0.874 test_accuracy: 0.849\n",
            "[4,  1365] train_loss: 0.258 train_accuracy: 0.869 test_accuracy: 0.834\n",
            "[4,  1370] train_loss: 0.357 train_accuracy: 0.864 test_accuracy: 0.825\n",
            "[4,  1375] train_loss: 0.789 train_accuracy: 0.860 test_accuracy: 0.831\n",
            "[4,  1380] train_loss: 0.550 train_accuracy: 0.844 test_accuracy: 0.812\n",
            "[4,  1385] train_loss: 0.770 train_accuracy: 0.861 test_accuracy: 0.832\n",
            "[4,  1390] train_loss: 0.765 train_accuracy: 0.856 test_accuracy: 0.832\n",
            "[4,  1395] train_loss: 0.804 train_accuracy: 0.840 test_accuracy: 0.810\n",
            "[4,  1400] train_loss: 0.889 train_accuracy: 0.839 test_accuracy: 0.799\n",
            "[4,  1405] train_loss: 0.518 train_accuracy: 0.842 test_accuracy: 0.800\n",
            "[4,  1410] train_loss: 0.282 train_accuracy: 0.825 test_accuracy: 0.808\n",
            "[4,  1415] train_loss: 0.996 train_accuracy: 0.844 test_accuracy: 0.826\n",
            "[4,  1420] train_loss: 0.730 train_accuracy: 0.883 test_accuracy: 0.859\n",
            "[4,  1425] train_loss: 0.497 train_accuracy: 0.876 test_accuracy: 0.848\n",
            "[4,  1430] train_loss: 0.647 train_accuracy: 0.853 test_accuracy: 0.820\n",
            "[4,  1435] train_loss: 0.641 train_accuracy: 0.860 test_accuracy: 0.846\n",
            "[4,  1440] train_loss: 0.721 train_accuracy: 0.855 test_accuracy: 0.840\n",
            "[4,  1445] train_loss: 0.604 train_accuracy: 0.837 test_accuracy: 0.821\n",
            "[4,  1450] train_loss: 0.777 train_accuracy: 0.850 test_accuracy: 0.822\n",
            "[4,  1455] train_loss: 0.637 train_accuracy: 0.848 test_accuracy: 0.819\n",
            "[4,  1460] train_loss: 0.588 train_accuracy: 0.856 test_accuracy: 0.827\n",
            "[4,  1465] train_loss: 0.393 train_accuracy: 0.850 test_accuracy: 0.824\n",
            "[4,  1470] train_loss: 0.448 train_accuracy: 0.832 test_accuracy: 0.812\n",
            "[4,  1475] train_loss: 0.409 train_accuracy: 0.853 test_accuracy: 0.839\n",
            "[4,  1480] train_loss: 0.271 train_accuracy: 0.884 test_accuracy: 0.865\n",
            "[4,  1485] train_loss: 0.370 train_accuracy: 0.895 test_accuracy: 0.867\n",
            "[4,  1490] train_loss: 0.560 train_accuracy: 0.884 test_accuracy: 0.857\n",
            "[4,  1495] train_loss: 0.428 train_accuracy: 0.875 test_accuracy: 0.853\n",
            "[4,  1500] train_loss: 0.233 train_accuracy: 0.869 test_accuracy: 0.851\n",
            "[4,  1505] train_loss: 1.341 train_accuracy: 0.885 test_accuracy: 0.852\n",
            "[4,  1510] train_loss: 0.228 train_accuracy: 0.862 test_accuracy: 0.831\n",
            "[4,  1515] train_loss: 0.383 train_accuracy: 0.842 test_accuracy: 0.816\n",
            "[4,  1520] train_loss: 0.494 train_accuracy: 0.846 test_accuracy: 0.822\n",
            "[4,  1525] train_loss: 0.456 train_accuracy: 0.836 test_accuracy: 0.808\n",
            "[4,  1530] train_loss: 0.377 train_accuracy: 0.836 test_accuracy: 0.814\n",
            "[4,  1535] train_loss: 0.532 train_accuracy: 0.851 test_accuracy: 0.828\n",
            "[4,  1540] train_loss: 0.561 train_accuracy: 0.857 test_accuracy: 0.836\n",
            "[4,  1545] train_loss: 0.413 train_accuracy: 0.852 test_accuracy: 0.833\n",
            "[4,  1550] train_loss: 0.284 train_accuracy: 0.868 test_accuracy: 0.829\n",
            "[4,  1555] train_loss: 0.607 train_accuracy: 0.857 test_accuracy: 0.817\n",
            "[4,  1560] train_loss: 0.541 train_accuracy: 0.857 test_accuracy: 0.824\n",
            "[4,  1565] train_loss: 0.800 train_accuracy: 0.831 test_accuracy: 0.797\n",
            "[4,  1570] train_loss: 0.628 train_accuracy: 0.840 test_accuracy: 0.808\n",
            "[4,  1575] train_loss: 0.426 train_accuracy: 0.852 test_accuracy: 0.820\n",
            "[4,  1580] train_loss: 0.609 train_accuracy: 0.875 test_accuracy: 0.852\n",
            "[4,  1585] train_loss: 0.227 train_accuracy: 0.850 test_accuracy: 0.833\n",
            "[4,  1590] train_loss: 0.609 train_accuracy: 0.851 test_accuracy: 0.835\n",
            "[4,  1595] train_loss: 0.752 train_accuracy: 0.869 test_accuracy: 0.853\n",
            "[4,  1600] train_loss: 1.008 train_accuracy: 0.877 test_accuracy: 0.859\n",
            "[4,  1605] train_loss: 0.378 train_accuracy: 0.873 test_accuracy: 0.853\n",
            "[4,  1610] train_loss: 0.284 train_accuracy: 0.863 test_accuracy: 0.843\n",
            "[4,  1615] train_loss: 0.483 train_accuracy: 0.869 test_accuracy: 0.837\n",
            "[4,  1620] train_loss: 0.710 train_accuracy: 0.874 test_accuracy: 0.842\n",
            "[4,  1625] train_loss: 0.590 train_accuracy: 0.852 test_accuracy: 0.820\n",
            "[4,  1630] train_loss: 0.692 train_accuracy: 0.845 test_accuracy: 0.814\n",
            "[4,  1635] train_loss: 0.215 train_accuracy: 0.842 test_accuracy: 0.813\n",
            "[4,  1640] train_loss: 0.231 train_accuracy: 0.853 test_accuracy: 0.824\n",
            "[4,  1645] train_loss: 0.442 train_accuracy: 0.877 test_accuracy: 0.846\n",
            "[4,  1650] train_loss: 0.574 train_accuracy: 0.886 test_accuracy: 0.857\n",
            "[4,  1655] train_loss: 0.589 train_accuracy: 0.887 test_accuracy: 0.860\n",
            "[4,  1660] train_loss: 0.479 train_accuracy: 0.875 test_accuracy: 0.844\n",
            "[4,  1665] train_loss: 0.295 train_accuracy: 0.871 test_accuracy: 0.843\n",
            "[4,  1670] train_loss: 0.418 train_accuracy: 0.882 test_accuracy: 0.849\n",
            "[4,  1675] train_loss: 0.523 train_accuracy: 0.872 test_accuracy: 0.838\n",
            "[4,  1680] train_loss: 0.191 train_accuracy: 0.843 test_accuracy: 0.808\n",
            "[4,  1685] train_loss: 1.109 train_accuracy: 0.855 test_accuracy: 0.823\n",
            "[4,  1690] train_loss: 0.311 train_accuracy: 0.879 test_accuracy: 0.858\n",
            "[4,  1695] train_loss: 0.306 train_accuracy: 0.876 test_accuracy: 0.863\n",
            "[4,  1700] train_loss: 0.632 train_accuracy: 0.869 test_accuracy: 0.859\n",
            "[4,  1705] train_loss: 1.050 train_accuracy: 0.860 test_accuracy: 0.847\n",
            "[4,  1710] train_loss: 0.465 train_accuracy: 0.860 test_accuracy: 0.835\n",
            "[4,  1715] train_loss: 0.403 train_accuracy: 0.845 test_accuracy: 0.814\n",
            "[4,  1720] train_loss: 0.562 train_accuracy: 0.836 test_accuracy: 0.803\n",
            "[4,  1725] train_loss: 0.778 train_accuracy: 0.825 test_accuracy: 0.784\n",
            "[5,     5] train_loss: 0.555 train_accuracy: 0.834 test_accuracy: 0.791\n",
            "[5,    10] train_loss: 0.911 train_accuracy: 0.859 test_accuracy: 0.820\n",
            "[5,    15] train_loss: 0.368 train_accuracy: 0.845 test_accuracy: 0.814\n",
            "[5,    20] train_loss: 0.467 train_accuracy: 0.863 test_accuracy: 0.833\n",
            "[5,    25] train_loss: 0.430 train_accuracy: 0.869 test_accuracy: 0.841\n",
            "[5,    30] train_loss: 0.207 train_accuracy: 0.863 test_accuracy: 0.848\n",
            "[5,    35] train_loss: 0.678 train_accuracy: 0.870 test_accuracy: 0.852\n",
            "[5,    40] train_loss: 0.216 train_accuracy: 0.889 test_accuracy: 0.858\n",
            "[5,    45] train_loss: 0.271 train_accuracy: 0.894 test_accuracy: 0.857\n",
            "[5,    50] train_loss: 0.482 train_accuracy: 0.888 test_accuracy: 0.858\n",
            "[5,    55] train_loss: 0.513 train_accuracy: 0.882 test_accuracy: 0.843\n",
            "[5,    60] train_loss: 0.220 train_accuracy: 0.877 test_accuracy: 0.845\n",
            "[5,    65] train_loss: 0.499 train_accuracy: 0.880 test_accuracy: 0.855\n",
            "[5,    70] train_loss: 0.622 train_accuracy: 0.888 test_accuracy: 0.862\n",
            "[5,    75] train_loss: 0.258 train_accuracy: 0.889 test_accuracy: 0.865\n",
            "[5,    80] train_loss: 0.479 train_accuracy: 0.877 test_accuracy: 0.854\n",
            "[5,    85] train_loss: 0.338 train_accuracy: 0.879 test_accuracy: 0.861\n",
            "[5,    90] train_loss: 0.200 train_accuracy: 0.884 test_accuracy: 0.864\n",
            "[5,    95] train_loss: 0.382 train_accuracy: 0.880 test_accuracy: 0.855\n",
            "[5,   100] train_loss: 0.353 train_accuracy: 0.878 test_accuracy: 0.857\n",
            "[5,   105] train_loss: 0.201 train_accuracy: 0.878 test_accuracy: 0.854\n",
            "[5,   110] train_loss: 0.260 train_accuracy: 0.885 test_accuracy: 0.858\n",
            "[5,   115] train_loss: 0.519 train_accuracy: 0.903 test_accuracy: 0.873\n",
            "[5,   120] train_loss: 0.945 train_accuracy: 0.904 test_accuracy: 0.869\n",
            "[5,   125] train_loss: 0.200 train_accuracy: 0.867 test_accuracy: 0.836\n",
            "[5,   130] train_loss: 0.638 train_accuracy: 0.839 test_accuracy: 0.807\n",
            "[5,   135] train_loss: 0.743 train_accuracy: 0.874 test_accuracy: 0.846\n",
            "[5,   140] train_loss: 0.136 train_accuracy: 0.863 test_accuracy: 0.835\n",
            "[5,   145] train_loss: 0.627 train_accuracy: 0.850 test_accuracy: 0.823\n",
            "[5,   150] train_loss: 0.585 train_accuracy: 0.852 test_accuracy: 0.816\n",
            "[5,   155] train_loss: 0.651 train_accuracy: 0.868 test_accuracy: 0.834\n",
            "[5,   160] train_loss: 0.336 train_accuracy: 0.877 test_accuracy: 0.842\n",
            "[5,   165] train_loss: 0.480 train_accuracy: 0.887 test_accuracy: 0.857\n",
            "[5,   170] train_loss: 0.104 train_accuracy: 0.876 test_accuracy: 0.850\n",
            "[5,   175] train_loss: 0.400 train_accuracy: 0.864 test_accuracy: 0.841\n",
            "[5,   180] train_loss: 0.566 train_accuracy: 0.880 test_accuracy: 0.858\n",
            "[5,   185] train_loss: 0.652 train_accuracy: 0.872 test_accuracy: 0.847\n",
            "[5,   190] train_loss: 0.466 train_accuracy: 0.866 test_accuracy: 0.834\n",
            "[5,   195] train_loss: 0.319 train_accuracy: 0.867 test_accuracy: 0.834\n",
            "[5,   200] train_loss: 0.474 train_accuracy: 0.882 test_accuracy: 0.852\n",
            "[5,   205] train_loss: 0.484 train_accuracy: 0.887 test_accuracy: 0.866\n",
            "[5,   210] train_loss: 0.588 train_accuracy: 0.874 test_accuracy: 0.861\n",
            "[5,   215] train_loss: 0.660 train_accuracy: 0.858 test_accuracy: 0.854\n",
            "[5,   220] train_loss: 0.288 train_accuracy: 0.851 test_accuracy: 0.838\n",
            "[5,   225] train_loss: 0.484 train_accuracy: 0.860 test_accuracy: 0.833\n",
            "[5,   230] train_loss: 0.292 train_accuracy: 0.847 test_accuracy: 0.818\n",
            "[5,   235] train_loss: 0.314 train_accuracy: 0.841 test_accuracy: 0.814\n",
            "[5,   240] train_loss: 0.433 train_accuracy: 0.844 test_accuracy: 0.813\n",
            "[5,   245] train_loss: 0.414 train_accuracy: 0.844 test_accuracy: 0.811\n",
            "[5,   250] train_loss: 0.314 train_accuracy: 0.882 test_accuracy: 0.850\n",
            "[5,   255] train_loss: 0.626 train_accuracy: 0.886 test_accuracy: 0.860\n",
            "[5,   260] train_loss: 0.255 train_accuracy: 0.868 test_accuracy: 0.852\n",
            "[5,   265] train_loss: 0.752 train_accuracy: 0.869 test_accuracy: 0.861\n",
            "[5,   270] train_loss: 0.442 train_accuracy: 0.863 test_accuracy: 0.859\n",
            "[5,   275] train_loss: 0.327 train_accuracy: 0.858 test_accuracy: 0.846\n",
            "[5,   280] train_loss: 0.472 train_accuracy: 0.833 test_accuracy: 0.810\n",
            "[5,   285] train_loss: 0.426 train_accuracy: 0.826 test_accuracy: 0.800\n",
            "[5,   290] train_loss: 0.529 train_accuracy: 0.846 test_accuracy: 0.820\n",
            "[5,   295] train_loss: 0.410 train_accuracy: 0.847 test_accuracy: 0.817\n",
            "[5,   300] train_loss: 0.439 train_accuracy: 0.854 test_accuracy: 0.834\n",
            "[5,   305] train_loss: 0.234 train_accuracy: 0.855 test_accuracy: 0.837\n",
            "[5,   310] train_loss: 0.359 train_accuracy: 0.854 test_accuracy: 0.829\n",
            "[5,   315] train_loss: 0.344 train_accuracy: 0.850 test_accuracy: 0.824\n",
            "[5,   320] train_loss: 0.781 train_accuracy: 0.863 test_accuracy: 0.839\n",
            "[5,   325] train_loss: 0.278 train_accuracy: 0.846 test_accuracy: 0.821\n",
            "[5,   330] train_loss: 0.357 train_accuracy: 0.816 test_accuracy: 0.776\n",
            "[5,   335] train_loss: 0.243 train_accuracy: 0.813 test_accuracy: 0.775\n",
            "[5,   340] train_loss: 1.141 train_accuracy: 0.867 test_accuracy: 0.830\n",
            "[5,   345] train_loss: 0.495 train_accuracy: 0.838 test_accuracy: 0.816\n",
            "[5,   350] train_loss: 0.441 train_accuracy: 0.791 test_accuracy: 0.801\n",
            "[5,   355] train_loss: 0.520 train_accuracy: 0.830 test_accuracy: 0.825\n",
            "[5,   360] train_loss: 0.292 train_accuracy: 0.843 test_accuracy: 0.833\n",
            "[5,   365] train_loss: 0.296 train_accuracy: 0.843 test_accuracy: 0.837\n",
            "[5,   370] train_loss: 0.614 train_accuracy: 0.854 test_accuracy: 0.844\n",
            "[5,   375] train_loss: 0.558 train_accuracy: 0.881 test_accuracy: 0.863\n",
            "[5,   380] train_loss: 0.326 train_accuracy: 0.890 test_accuracy: 0.870\n",
            "[5,   385] train_loss: 0.686 train_accuracy: 0.879 test_accuracy: 0.858\n",
            "[5,   390] train_loss: 0.428 train_accuracy: 0.852 test_accuracy: 0.840\n",
            "[5,   395] train_loss: 0.421 train_accuracy: 0.828 test_accuracy: 0.817\n",
            "[5,   400] train_loss: 0.630 train_accuracy: 0.858 test_accuracy: 0.835\n",
            "[5,   405] train_loss: 1.289 train_accuracy: 0.876 test_accuracy: 0.842\n",
            "[5,   410] train_loss: 0.160 train_accuracy: 0.859 test_accuracy: 0.834\n",
            "[5,   415] train_loss: 0.469 train_accuracy: 0.854 test_accuracy: 0.829\n",
            "[5,   420] train_loss: 0.475 train_accuracy: 0.851 test_accuracy: 0.821\n",
            "[5,   425] train_loss: 1.042 train_accuracy: 0.837 test_accuracy: 0.806\n",
            "[5,   430] train_loss: 0.513 train_accuracy: 0.849 test_accuracy: 0.816\n",
            "[5,   435] train_loss: 0.441 train_accuracy: 0.863 test_accuracy: 0.842\n",
            "[5,   440] train_loss: 0.210 train_accuracy: 0.864 test_accuracy: 0.844\n",
            "[5,   445] train_loss: 0.754 train_accuracy: 0.883 test_accuracy: 0.862\n",
            "[5,   450] train_loss: 0.333 train_accuracy: 0.876 test_accuracy: 0.851\n",
            "[5,   455] train_loss: 0.131 train_accuracy: 0.875 test_accuracy: 0.844\n",
            "[5,   460] train_loss: 0.551 train_accuracy: 0.859 test_accuracy: 0.841\n",
            "[5,   465] train_loss: 0.480 train_accuracy: 0.832 test_accuracy: 0.824\n",
            "[5,   470] train_loss: 1.157 train_accuracy: 0.850 test_accuracy: 0.845\n",
            "[5,   475] train_loss: 0.552 train_accuracy: 0.860 test_accuracy: 0.846\n",
            "[5,   480] train_loss: 0.227 train_accuracy: 0.840 test_accuracy: 0.819\n",
            "[5,   485] train_loss: 0.534 train_accuracy: 0.832 test_accuracy: 0.800\n",
            "[5,   490] train_loss: 0.543 train_accuracy: 0.816 test_accuracy: 0.792\n",
            "[5,   495] train_loss: 0.532 train_accuracy: 0.816 test_accuracy: 0.801\n",
            "[5,   500] train_loss: 0.300 train_accuracy: 0.832 test_accuracy: 0.816\n",
            "[5,   505] train_loss: 0.412 train_accuracy: 0.849 test_accuracy: 0.808\n",
            "[5,   510] train_loss: 0.916 train_accuracy: 0.856 test_accuracy: 0.832\n",
            "[5,   515] train_loss: 0.395 train_accuracy: 0.863 test_accuracy: 0.844\n",
            "[5,   520] train_loss: 0.383 train_accuracy: 0.842 test_accuracy: 0.818\n",
            "[5,   525] train_loss: 0.529 train_accuracy: 0.843 test_accuracy: 0.816\n",
            "[5,   530] train_loss: 0.419 train_accuracy: 0.843 test_accuracy: 0.814\n",
            "[5,   535] train_loss: 0.295 train_accuracy: 0.839 test_accuracy: 0.804\n",
            "[5,   540] train_loss: 0.553 train_accuracy: 0.854 test_accuracy: 0.817\n",
            "[5,   545] train_loss: 0.641 train_accuracy: 0.871 test_accuracy: 0.839\n",
            "[5,   550] train_loss: 0.484 train_accuracy: 0.857 test_accuracy: 0.826\n",
            "[5,   555] train_loss: 0.210 train_accuracy: 0.850 test_accuracy: 0.819\n",
            "[5,   560] train_loss: 0.660 train_accuracy: 0.844 test_accuracy: 0.823\n",
            "[5,   565] train_loss: 0.337 train_accuracy: 0.829 test_accuracy: 0.818\n",
            "[5,   570] train_loss: 0.434 train_accuracy: 0.831 test_accuracy: 0.825\n",
            "[5,   575] train_loss: 0.536 train_accuracy: 0.849 test_accuracy: 0.837\n",
            "[5,   580] train_loss: 0.405 train_accuracy: 0.851 test_accuracy: 0.830\n",
            "[5,   585] train_loss: 0.308 train_accuracy: 0.855 test_accuracy: 0.840\n",
            "[5,   590] train_loss: 0.512 train_accuracy: 0.873 test_accuracy: 0.865\n",
            "[5,   595] train_loss: 0.464 train_accuracy: 0.852 test_accuracy: 0.846\n",
            "[5,   600] train_loss: 0.441 train_accuracy: 0.824 test_accuracy: 0.804\n",
            "[5,   605] train_loss: 0.738 train_accuracy: 0.854 test_accuracy: 0.821\n",
            "[5,   610] train_loss: 0.817 train_accuracy: 0.876 test_accuracy: 0.843\n",
            "[5,   615] train_loss: 0.226 train_accuracy: 0.856 test_accuracy: 0.828\n",
            "[5,   620] train_loss: 0.915 train_accuracy: 0.862 test_accuracy: 0.834\n",
            "[5,   625] train_loss: 0.551 train_accuracy: 0.857 test_accuracy: 0.827\n",
            "[5,   630] train_loss: 0.508 train_accuracy: 0.872 test_accuracy: 0.850\n",
            "[5,   635] train_loss: 0.243 train_accuracy: 0.881 test_accuracy: 0.870\n",
            "[5,   640] train_loss: 0.850 train_accuracy: 0.882 test_accuracy: 0.870\n",
            "[5,   645] train_loss: 0.635 train_accuracy: 0.850 test_accuracy: 0.849\n",
            "[5,   650] train_loss: 0.268 train_accuracy: 0.858 test_accuracy: 0.844\n",
            "[5,   655] train_loss: 0.182 train_accuracy: 0.860 test_accuracy: 0.834\n",
            "[5,   660] train_loss: 0.348 train_accuracy: 0.856 test_accuracy: 0.816\n",
            "[5,   665] train_loss: 0.167 train_accuracy: 0.824 test_accuracy: 0.780\n",
            "[5,   670] train_loss: 0.707 train_accuracy: 0.807 test_accuracy: 0.791\n",
            "[5,   675] train_loss: 0.484 train_accuracy: 0.821 test_accuracy: 0.830\n",
            "[5,   680] train_loss: 0.325 train_accuracy: 0.829 test_accuracy: 0.842\n",
            "[5,   685] train_loss: 1.251 train_accuracy: 0.844 test_accuracy: 0.851\n",
            "[5,   690] train_loss: 0.771 train_accuracy: 0.867 test_accuracy: 0.838\n",
            "[5,   695] train_loss: 0.282 train_accuracy: 0.849 test_accuracy: 0.822\n",
            "[5,   700] train_loss: 0.329 train_accuracy: 0.828 test_accuracy: 0.798\n",
            "[5,   705] train_loss: 0.301 train_accuracy: 0.821 test_accuracy: 0.778\n",
            "[5,   710] train_loss: 0.446 train_accuracy: 0.819 test_accuracy: 0.782\n",
            "[5,   715] train_loss: 0.195 train_accuracy: 0.823 test_accuracy: 0.797\n",
            "[5,   720] train_loss: 0.376 train_accuracy: 0.842 test_accuracy: 0.815\n",
            "[5,   725] train_loss: 0.424 train_accuracy: 0.842 test_accuracy: 0.834\n",
            "[5,   730] train_loss: 0.235 train_accuracy: 0.839 test_accuracy: 0.819\n",
            "[5,   735] train_loss: 0.565 train_accuracy: 0.851 test_accuracy: 0.819\n",
            "[5,   740] train_loss: 0.485 train_accuracy: 0.840 test_accuracy: 0.811\n",
            "[5,   745] train_loss: 0.479 train_accuracy: 0.845 test_accuracy: 0.815\n",
            "[5,   750] train_loss: 0.863 train_accuracy: 0.841 test_accuracy: 0.827\n",
            "[5,   755] train_loss: 0.281 train_accuracy: 0.836 test_accuracy: 0.826\n",
            "[5,   760] train_loss: 0.317 train_accuracy: 0.817 test_accuracy: 0.797\n",
            "[5,   765] train_loss: 0.164 train_accuracy: 0.821 test_accuracy: 0.778\n",
            "[5,   770] train_loss: 0.224 train_accuracy: 0.841 test_accuracy: 0.812\n",
            "[5,   775] train_loss: 1.295 train_accuracy: 0.852 test_accuracy: 0.827\n",
            "[5,   780] train_loss: 0.657 train_accuracy: 0.805 test_accuracy: 0.806\n",
            "[5,   785] train_loss: 0.660 train_accuracy: 0.815 test_accuracy: 0.810\n",
            "[5,   790] train_loss: 0.738 train_accuracy: 0.828 test_accuracy: 0.824\n",
            "[5,   795] train_loss: 0.304 train_accuracy: 0.833 test_accuracy: 0.837\n",
            "[5,   800] train_loss: 0.982 train_accuracy: 0.862 test_accuracy: 0.861\n",
            "[5,   805] train_loss: 0.396 train_accuracy: 0.880 test_accuracy: 0.857\n",
            "[5,   810] train_loss: 0.730 train_accuracy: 0.850 test_accuracy: 0.820\n",
            "[5,   815] train_loss: 0.453 train_accuracy: 0.836 test_accuracy: 0.801\n",
            "[5,   820] train_loss: 0.645 train_accuracy: 0.791 test_accuracy: 0.774\n",
            "[5,   825] train_loss: 0.827 train_accuracy: 0.820 test_accuracy: 0.805\n",
            "[5,   830] train_loss: 0.747 train_accuracy: 0.830 test_accuracy: 0.829\n",
            "[5,   835] train_loss: 0.510 train_accuracy: 0.806 test_accuracy: 0.812\n",
            "[5,   840] train_loss: 0.344 train_accuracy: 0.815 test_accuracy: 0.814\n",
            "[5,   845] train_loss: 0.591 train_accuracy: 0.875 test_accuracy: 0.857\n",
            "[5,   850] train_loss: 0.199 train_accuracy: 0.893 test_accuracy: 0.864\n",
            "[5,   855] train_loss: 0.339 train_accuracy: 0.878 test_accuracy: 0.846\n",
            "[5,   860] train_loss: 0.616 train_accuracy: 0.871 test_accuracy: 0.842\n",
            "[5,   865] train_loss: 0.444 train_accuracy: 0.863 test_accuracy: 0.832\n",
            "[5,   870] train_loss: 0.625 train_accuracy: 0.876 test_accuracy: 0.840\n",
            "[5,   875] train_loss: 0.445 train_accuracy: 0.869 test_accuracy: 0.838\n",
            "[5,   880] train_loss: 0.470 train_accuracy: 0.869 test_accuracy: 0.843\n",
            "[5,   885] train_loss: 1.017 train_accuracy: 0.893 test_accuracy: 0.864\n",
            "[5,   890] train_loss: 0.490 train_accuracy: 0.887 test_accuracy: 0.857\n",
            "[5,   895] train_loss: 0.246 train_accuracy: 0.861 test_accuracy: 0.833\n",
            "[5,   900] train_loss: 0.250 train_accuracy: 0.860 test_accuracy: 0.824\n",
            "[5,   905] train_loss: 0.467 train_accuracy: 0.872 test_accuracy: 0.834\n",
            "[5,   910] train_loss: 0.363 train_accuracy: 0.872 test_accuracy: 0.842\n",
            "[5,   915] train_loss: 0.280 train_accuracy: 0.861 test_accuracy: 0.833\n",
            "[5,   920] train_loss: 0.240 train_accuracy: 0.854 test_accuracy: 0.824\n",
            "[5,   925] train_loss: 0.730 train_accuracy: 0.871 test_accuracy: 0.838\n",
            "[5,   930] train_loss: 0.466 train_accuracy: 0.885 test_accuracy: 0.852\n",
            "[5,   935] train_loss: 0.656 train_accuracy: 0.885 test_accuracy: 0.865\n",
            "[5,   940] train_loss: 0.256 train_accuracy: 0.878 test_accuracy: 0.857\n",
            "[5,   945] train_loss: 0.779 train_accuracy: 0.871 test_accuracy: 0.861\n",
            "[5,   950] train_loss: 0.548 train_accuracy: 0.846 test_accuracy: 0.847\n",
            "[5,   955] train_loss: 0.338 train_accuracy: 0.858 test_accuracy: 0.851\n",
            "[5,   960] train_loss: 0.235 train_accuracy: 0.861 test_accuracy: 0.845\n",
            "[5,   965] train_loss: 0.488 train_accuracy: 0.872 test_accuracy: 0.851\n",
            "[5,   970] train_loss: 0.292 train_accuracy: 0.881 test_accuracy: 0.860\n",
            "[5,   975] train_loss: 0.280 train_accuracy: 0.881 test_accuracy: 0.865\n",
            "[5,   980] train_loss: 0.779 train_accuracy: 0.869 test_accuracy: 0.858\n",
            "[5,   985] train_loss: 0.297 train_accuracy: 0.866 test_accuracy: 0.848\n",
            "[5,   990] train_loss: 0.601 train_accuracy: 0.868 test_accuracy: 0.837\n",
            "[5,   995] train_loss: 0.485 train_accuracy: 0.859 test_accuracy: 0.823\n",
            "[5,  1000] train_loss: 0.452 train_accuracy: 0.863 test_accuracy: 0.824\n",
            "[5,  1005] train_loss: 0.730 train_accuracy: 0.860 test_accuracy: 0.814\n",
            "[5,  1010] train_loss: 0.299 train_accuracy: 0.862 test_accuracy: 0.815\n",
            "[5,  1015] train_loss: 0.756 train_accuracy: 0.878 test_accuracy: 0.836\n",
            "[5,  1020] train_loss: 0.320 train_accuracy: 0.880 test_accuracy: 0.830\n",
            "[5,  1025] train_loss: 0.966 train_accuracy: 0.882 test_accuracy: 0.839\n",
            "[5,  1030] train_loss: 0.187 train_accuracy: 0.842 test_accuracy: 0.814\n",
            "[5,  1035] train_loss: 0.342 train_accuracy: 0.836 test_accuracy: 0.813\n",
            "[5,  1040] train_loss: 0.429 train_accuracy: 0.841 test_accuracy: 0.814\n",
            "[5,  1045] train_loss: 0.661 train_accuracy: 0.882 test_accuracy: 0.855\n",
            "[5,  1050] train_loss: 0.307 train_accuracy: 0.886 test_accuracy: 0.855\n",
            "[5,  1055] train_loss: 0.457 train_accuracy: 0.882 test_accuracy: 0.847\n",
            "[5,  1060] train_loss: 0.378 train_accuracy: 0.877 test_accuracy: 0.838\n",
            "[5,  1065] train_loss: 0.556 train_accuracy: 0.866 test_accuracy: 0.821\n",
            "[5,  1070] train_loss: 0.484 train_accuracy: 0.846 test_accuracy: 0.800\n",
            "[5,  1075] train_loss: 0.354 train_accuracy: 0.835 test_accuracy: 0.785\n",
            "[5,  1080] train_loss: 0.676 train_accuracy: 0.853 test_accuracy: 0.807\n",
            "[5,  1085] train_loss: 0.265 train_accuracy: 0.857 test_accuracy: 0.822\n",
            "[5,  1090] train_loss: 0.414 train_accuracy: 0.860 test_accuracy: 0.840\n",
            "[5,  1095] train_loss: 0.192 train_accuracy: 0.824 test_accuracy: 0.825\n",
            "[5,  1100] train_loss: 0.322 train_accuracy: 0.835 test_accuracy: 0.830\n",
            "[5,  1105] train_loss: 0.319 train_accuracy: 0.856 test_accuracy: 0.845\n",
            "[5,  1110] train_loss: 0.489 train_accuracy: 0.843 test_accuracy: 0.833\n",
            "[5,  1115] train_loss: 0.719 train_accuracy: 0.849 test_accuracy: 0.834\n",
            "[5,  1120] train_loss: 0.721 train_accuracy: 0.871 test_accuracy: 0.847\n",
            "[5,  1125] train_loss: 0.281 train_accuracy: 0.871 test_accuracy: 0.844\n",
            "[5,  1130] train_loss: 0.289 train_accuracy: 0.861 test_accuracy: 0.840\n",
            "[5,  1135] train_loss: 0.227 train_accuracy: 0.866 test_accuracy: 0.834\n",
            "[5,  1140] train_loss: 0.643 train_accuracy: 0.865 test_accuracy: 0.828\n",
            "[5,  1145] train_loss: 0.822 train_accuracy: 0.834 test_accuracy: 0.798\n",
            "[5,  1150] train_loss: 0.973 train_accuracy: 0.841 test_accuracy: 0.814\n",
            "[5,  1155] train_loss: 0.799 train_accuracy: 0.846 test_accuracy: 0.825\n",
            "[5,  1160] train_loss: 0.420 train_accuracy: 0.868 test_accuracy: 0.847\n",
            "[5,  1165] train_loss: 0.703 train_accuracy: 0.871 test_accuracy: 0.851\n",
            "[5,  1170] train_loss: 0.488 train_accuracy: 0.860 test_accuracy: 0.840\n",
            "[5,  1175] train_loss: 0.433 train_accuracy: 0.843 test_accuracy: 0.818\n",
            "[5,  1180] train_loss: 0.371 train_accuracy: 0.838 test_accuracy: 0.806\n",
            "[5,  1185] train_loss: 0.480 train_accuracy: 0.857 test_accuracy: 0.827\n",
            "[5,  1190] train_loss: 0.501 train_accuracy: 0.861 test_accuracy: 0.836\n",
            "[5,  1195] train_loss: 0.344 train_accuracy: 0.857 test_accuracy: 0.827\n",
            "[5,  1200] train_loss: 0.914 train_accuracy: 0.854 test_accuracy: 0.817\n",
            "[5,  1205] train_loss: 0.476 train_accuracy: 0.844 test_accuracy: 0.805\n",
            "[5,  1210] train_loss: 0.304 train_accuracy: 0.841 test_accuracy: 0.813\n",
            "[5,  1215] train_loss: 0.640 train_accuracy: 0.858 test_accuracy: 0.837\n",
            "[5,  1220] train_loss: 0.567 train_accuracy: 0.867 test_accuracy: 0.846\n",
            "[5,  1225] train_loss: 0.565 train_accuracy: 0.873 test_accuracy: 0.850\n",
            "[5,  1230] train_loss: 0.384 train_accuracy: 0.875 test_accuracy: 0.847\n",
            "[5,  1235] train_loss: 0.381 train_accuracy: 0.876 test_accuracy: 0.849\n",
            "[5,  1240] train_loss: 0.161 train_accuracy: 0.874 test_accuracy: 0.845\n",
            "[5,  1245] train_loss: 0.434 train_accuracy: 0.872 test_accuracy: 0.843\n",
            "[5,  1250] train_loss: 0.281 train_accuracy: 0.876 test_accuracy: 0.848\n",
            "[5,  1255] train_loss: 0.248 train_accuracy: 0.866 test_accuracy: 0.841\n",
            "[5,  1260] train_loss: 0.728 train_accuracy: 0.867 test_accuracy: 0.839\n",
            "[5,  1265] train_loss: 0.617 train_accuracy: 0.871 test_accuracy: 0.836\n",
            "[5,  1270] train_loss: 0.459 train_accuracy: 0.839 test_accuracy: 0.801\n",
            "[5,  1275] train_loss: 0.985 train_accuracy: 0.842 test_accuracy: 0.811\n",
            "[5,  1280] train_loss: 0.484 train_accuracy: 0.846 test_accuracy: 0.829\n",
            "[5,  1285] train_loss: 0.385 train_accuracy: 0.854 test_accuracy: 0.835\n",
            "[5,  1290] train_loss: 0.267 train_accuracy: 0.852 test_accuracy: 0.837\n",
            "[5,  1295] train_loss: 0.321 train_accuracy: 0.854 test_accuracy: 0.829\n",
            "[5,  1300] train_loss: 0.430 train_accuracy: 0.852 test_accuracy: 0.821\n",
            "[5,  1305] train_loss: 0.757 train_accuracy: 0.861 test_accuracy: 0.823\n",
            "[5,  1310] train_loss: 0.917 train_accuracy: 0.846 test_accuracy: 0.821\n",
            "[5,  1315] train_loss: 0.597 train_accuracy: 0.832 test_accuracy: 0.820\n",
            "[5,  1320] train_loss: 0.545 train_accuracy: 0.828 test_accuracy: 0.812\n",
            "[5,  1325] train_loss: 0.595 train_accuracy: 0.822 test_accuracy: 0.806\n",
            "[5,  1330] train_loss: 0.561 train_accuracy: 0.850 test_accuracy: 0.816\n",
            "[5,  1335] train_loss: 0.445 train_accuracy: 0.866 test_accuracy: 0.822\n",
            "[5,  1340] train_loss: 0.208 train_accuracy: 0.871 test_accuracy: 0.832\n",
            "[5,  1345] train_loss: 0.218 train_accuracy: 0.886 test_accuracy: 0.850\n",
            "[5,  1350] train_loss: 0.869 train_accuracy: 0.890 test_accuracy: 0.856\n",
            "[5,  1355] train_loss: 0.243 train_accuracy: 0.888 test_accuracy: 0.852\n",
            "[5,  1360] train_loss: 0.434 train_accuracy: 0.893 test_accuracy: 0.862\n",
            "[5,  1365] train_loss: 0.235 train_accuracy: 0.898 test_accuracy: 0.865\n",
            "[5,  1370] train_loss: 0.344 train_accuracy: 0.885 test_accuracy: 0.866\n",
            "[5,  1375] train_loss: 0.252 train_accuracy: 0.865 test_accuracy: 0.862\n",
            "[5,  1380] train_loss: 0.323 train_accuracy: 0.869 test_accuracy: 0.862\n",
            "[5,  1385] train_loss: 0.327 train_accuracy: 0.880 test_accuracy: 0.868\n",
            "[5,  1390] train_loss: 0.161 train_accuracy: 0.880 test_accuracy: 0.863\n",
            "[5,  1395] train_loss: 0.311 train_accuracy: 0.879 test_accuracy: 0.865\n",
            "[5,  1400] train_loss: 0.346 train_accuracy: 0.866 test_accuracy: 0.853\n",
            "[5,  1405] train_loss: 0.654 train_accuracy: 0.855 test_accuracy: 0.846\n",
            "[5,  1410] train_loss: 0.774 train_accuracy: 0.861 test_accuracy: 0.847\n",
            "[5,  1415] train_loss: 0.896 train_accuracy: 0.852 test_accuracy: 0.816\n",
            "[5,  1420] train_loss: 0.592 train_accuracy: 0.801 test_accuracy: 0.763\n",
            "[5,  1425] train_loss: 0.469 train_accuracy: 0.817 test_accuracy: 0.774\n",
            "[5,  1430] train_loss: 0.892 train_accuracy: 0.818 test_accuracy: 0.778\n",
            "[5,  1435] train_loss: 0.347 train_accuracy: 0.804 test_accuracy: 0.777\n",
            "[5,  1440] train_loss: 0.440 train_accuracy: 0.821 test_accuracy: 0.806\n",
            "[5,  1445] train_loss: 0.355 train_accuracy: 0.830 test_accuracy: 0.822\n",
            "[5,  1450] train_loss: 0.465 train_accuracy: 0.831 test_accuracy: 0.834\n",
            "[5,  1455] train_loss: 0.517 train_accuracy: 0.812 test_accuracy: 0.819\n",
            "[5,  1460] train_loss: 0.610 train_accuracy: 0.790 test_accuracy: 0.784\n",
            "[5,  1465] train_loss: 0.585 train_accuracy: 0.830 test_accuracy: 0.819\n",
            "[5,  1470] train_loss: 0.598 train_accuracy: 0.866 test_accuracy: 0.837\n",
            "[5,  1475] train_loss: 0.438 train_accuracy: 0.868 test_accuracy: 0.836\n",
            "[5,  1480] train_loss: 0.351 train_accuracy: 0.872 test_accuracy: 0.835\n",
            "[5,  1485] train_loss: 0.112 train_accuracy: 0.859 test_accuracy: 0.825\n",
            "[5,  1490] train_loss: 0.473 train_accuracy: 0.854 test_accuracy: 0.818\n",
            "[5,  1495] train_loss: 0.478 train_accuracy: 0.869 test_accuracy: 0.842\n",
            "[5,  1500] train_loss: 0.331 train_accuracy: 0.872 test_accuracy: 0.841\n",
            "[5,  1505] train_loss: 0.711 train_accuracy: 0.863 test_accuracy: 0.825\n",
            "[5,  1510] train_loss: 0.564 train_accuracy: 0.864 test_accuracy: 0.830\n",
            "[5,  1515] train_loss: 0.545 train_accuracy: 0.847 test_accuracy: 0.819\n",
            "[5,  1520] train_loss: 0.452 train_accuracy: 0.856 test_accuracy: 0.819\n",
            "[5,  1525] train_loss: 0.260 train_accuracy: 0.874 test_accuracy: 0.837\n",
            "[5,  1530] train_loss: 0.893 train_accuracy: 0.882 test_accuracy: 0.854\n",
            "[5,  1535] train_loss: 0.565 train_accuracy: 0.871 test_accuracy: 0.859\n",
            "[5,  1540] train_loss: 0.505 train_accuracy: 0.867 test_accuracy: 0.857\n",
            "[5,  1545] train_loss: 0.829 train_accuracy: 0.872 test_accuracy: 0.861\n",
            "[5,  1550] train_loss: 0.200 train_accuracy: 0.875 test_accuracy: 0.854\n",
            "[5,  1555] train_loss: 0.524 train_accuracy: 0.878 test_accuracy: 0.848\n",
            "[5,  1560] train_loss: 0.573 train_accuracy: 0.881 test_accuracy: 0.852\n",
            "[5,  1565] train_loss: 0.458 train_accuracy: 0.885 test_accuracy: 0.861\n",
            "[5,  1570] train_loss: 0.562 train_accuracy: 0.870 test_accuracy: 0.858\n",
            "[5,  1575] train_loss: 0.514 train_accuracy: 0.881 test_accuracy: 0.861\n",
            "[5,  1580] train_loss: 0.362 train_accuracy: 0.888 test_accuracy: 0.861\n",
            "[5,  1585] train_loss: 0.490 train_accuracy: 0.889 test_accuracy: 0.858\n",
            "[5,  1590] train_loss: 0.259 train_accuracy: 0.894 test_accuracy: 0.861\n",
            "[5,  1595] train_loss: 0.375 train_accuracy: 0.885 test_accuracy: 0.852\n",
            "[5,  1600] train_loss: 0.420 train_accuracy: 0.871 test_accuracy: 0.832\n",
            "[5,  1605] train_loss: 0.317 train_accuracy: 0.849 test_accuracy: 0.811\n",
            "[5,  1610] train_loss: 0.301 train_accuracy: 0.844 test_accuracy: 0.810\n",
            "[5,  1615] train_loss: 0.367 train_accuracy: 0.869 test_accuracy: 0.832\n",
            "[5,  1620] train_loss: 0.792 train_accuracy: 0.877 test_accuracy: 0.835\n",
            "[5,  1625] train_loss: 0.165 train_accuracy: 0.862 test_accuracy: 0.815\n",
            "[5,  1630] train_loss: 0.295 train_accuracy: 0.851 test_accuracy: 0.803\n",
            "[5,  1635] train_loss: 0.500 train_accuracy: 0.857 test_accuracy: 0.814\n",
            "[5,  1640] train_loss: 0.819 train_accuracy: 0.875 test_accuracy: 0.832\n",
            "[5,  1645] train_loss: 0.331 train_accuracy: 0.867 test_accuracy: 0.824\n",
            "[5,  1650] train_loss: 0.338 train_accuracy: 0.872 test_accuracy: 0.827\n",
            "[5,  1655] train_loss: 0.402 train_accuracy: 0.877 test_accuracy: 0.836\n",
            "[5,  1660] train_loss: 0.517 train_accuracy: 0.868 test_accuracy: 0.830\n",
            "[5,  1665] train_loss: 0.359 train_accuracy: 0.865 test_accuracy: 0.827\n",
            "[5,  1670] train_loss: 0.379 train_accuracy: 0.874 test_accuracy: 0.840\n",
            "[5,  1675] train_loss: 0.499 train_accuracy: 0.880 test_accuracy: 0.848\n",
            "[5,  1680] train_loss: 0.638 train_accuracy: 0.877 test_accuracy: 0.849\n",
            "[5,  1685] train_loss: 0.161 train_accuracy: 0.881 test_accuracy: 0.851\n",
            "[5,  1690] train_loss: 0.493 train_accuracy: 0.886 test_accuracy: 0.865\n",
            "[5,  1695] train_loss: 0.273 train_accuracy: 0.861 test_accuracy: 0.834\n",
            "[5,  1700] train_loss: 0.686 train_accuracy: 0.814 test_accuracy: 0.779\n",
            "[5,  1705] train_loss: 0.398 train_accuracy: 0.776 test_accuracy: 0.742\n",
            "[5,  1710] train_loss: 0.612 train_accuracy: 0.789 test_accuracy: 0.745\n",
            "[5,  1715] train_loss: 0.732 train_accuracy: 0.818 test_accuracy: 0.774\n",
            "[5,  1720] train_loss: 0.716 train_accuracy: 0.861 test_accuracy: 0.827\n",
            "[5,  1725] train_loss: 0.398 train_accuracy: 0.869 test_accuracy: 0.846\n"
          ]
        }
      ],
      "source": [
        "import torch.nn as nn\n",
        "import torch.nn.functional as F\n",
        "from collections import Counter, defaultdict\n",
        "from itertools import combinations\n",
        "import random\n",
        "\n",
        "a_train = []  # To store training accuracy\n",
        "a_test = []  # To store test accuracy\n",
        "lossaaa = []  # To store loss values\n",
        "Inf = []  # To store information values\n",
        "Var_all = []  # To store all variation values\n",
        "Generalization_Ratio_ = []  # To store generalization ratios\n",
        "dicide_action = []  # To store decision actions\n",
        "loss_before = torch.tensor(30.0)  # Initial loss value\n",
        "los = torch.tensor(30.0)  # Initial loss value\n",
        "dis_before_A1 = torch.tensor(0)  # Distance before action A1\n",
        "dis_before_A2 = torch.tensor(0)  # Distance before action A2\n",
        "dis = torch.tensor(1)  # Distance increment\n",
        "per = \"N\"  # Previous iteration's action\n",
        "state_before = torch.tensor(0.0)  # Previous state value\n",
        "categrary_number = 10  # Number of categories\n",
        "tra_val_number = 2  # Training validation number\n",
        "\n",
        "for epoch in range(5):  # Loop over the dataset multiple times\n",
        "    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader\n",
        "    running_loss = 0.0  # Running loss\n",
        "    running_loss_all = 0.0  # Running loss for all\n",
        "    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable\n",
        "    los = Variable(los, requires_grad=False)  # Make los a variable\n",
        "    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable\n",
        "    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable\n",
        "    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable\n",
        "    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable\n",
        "    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable\n",
        "    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable\n",
        "\n",
        "    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data\n",
        "        ### calculate losses\n",
        "        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability\n",
        "        labels = labels.cuda()  # Move labels to GPU\n",
        "        imgs = imgs.cuda()  # Move images to GPU\n",
        "        out_e1234 = net(imgs)  # Get output from the network\n",
        "        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)\n",
        "\n",
        "        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter\n",
        "        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU\n",
        "        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU\n",
        "        out_e3412 = net(e3412_imgs)  # Get output from the network\n",
        "        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)\n",
        "\n",
        "        #################### extract\n",
        "        ## e1 ext count\n",
        "        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU\n",
        "        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU\n",
        "        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ## e2 ext count\n",
        "        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU\n",
        "        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU\n",
        "        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ########### e1 count\n",
        "        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU\n",
        "        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU\n",
        "        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network\n",
        "        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks\n",
        "        split_e12_inform = list(split_e12_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e12_inform)):\n",
        "            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e12 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e12[category] = counts_E12[category]  # Update counts\n",
        "            split_e12_inform[i] = counts_e12  # Store updated counts\n",
        "\n",
        "        ########### e2 count\n",
        "        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU\n",
        "        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU\n",
        "        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network\n",
        "        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks\n",
        "        split_e34_inform = list(split_e34_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e34_inform)):\n",
        "            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e34 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e34[category] = counts_E34[category]  # Update counts\n",
        "            split_e34_inform[i] = counts_e34  # Store updated counts\n",
        "\n",
        "        ##########\n",
        "        ## IN_IN_pro\n",
        "        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform\n",
        "        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform\n",
        "        counts_in_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_in_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_12[category] = counts_i_12[category]  # Update counts for e12\n",
        "        total_samples_in_12 = len(e12_inform)  # Total samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_34[category] = counts_i_34[category]  # Update counts for e34\n",
        "        total_samples_in_34 = len(e34_inform)  # Total samples in e34\n",
        "\n",
        "        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12\n",
        "        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12\n",
        "        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12\n",
        "\n",
        "        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34\n",
        "        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34\n",
        "        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34\n",
        "\n",
        "        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted\n",
        "        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted\n",
        "        counts_extra_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_extra_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted\n",
        "        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted\n",
        "        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34\n",
        "\n",
        "        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted\n",
        "        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted\n",
        "        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted\n",
        "\n",
        "        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted\n",
        "        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted\n",
        "        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted\n",
        "\n",
        "        ############################### Variation x ###################################\n",
        "        in_12_all = probabilities_tensor_in_12\n",
        "        in_34_all = probabilities_tensor_in_34\n",
        "        in_12_extra = probabilities_tensor_extra_12\n",
        "        in_34_extra = probabilities_tensor_extra_34\n",
        "        weight_tra_probility_all = 1.0 / 2\n",
        "        dow_all = \\\n",
        "              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +\n",
        "              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)\n",
        "        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all\n",
        "        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all\n",
        "        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all\n",
        "        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all\n",
        "        in_1_all = in_1_all[0]\n",
        "        in_2_all = in_2_all[0]\n",
        "        e_1_all = e_1_all[0]\n",
        "        e_2_all = e_2_all[0]\n",
        "        d_KL_all = torch.zeros(10)\n",
        "        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)\n",
        "        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)\n",
        "        d_KL_all = torch.max(abs(k_divergence_all))\n",
        "        d_KL_all_ = torch.max(abs(k_divergence_all_))\n",
        "        Variation_all = torch.max(d_KL_all, d_KL_all_)\n",
        "        Var_all.append(Variation_all)\n",
        "\n",
        "        ############################ Information ###################################\n",
        "        all_combinations = list(combinations(range(10), 2))\n",
        "        K = categrary_number * (categrary_number - 1)\n",
        "        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor\n",
        "        for c in range(10):\n",
        "            for idx, (i, j) in enumerate(all_combinations):\n",
        "                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                min_value = torch.min(s1, s3)\n",
        "                result_tensor[idx] = min_value.item()\n",
        "                idx_ = idx + len(all_combinations)\n",
        "                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                min_value_ = torch.min(s1_, s3_)\n",
        "                result_tensor[idx_] = min_value_.item()\n",
        "        Information = torch.sum(result_tensor) / K\n",
        "        Inf.append(Information)\n",
        "\n",
        "        ############################ Generalization_Ratio ###################################\n",
        "        Generalization_Ratio = Variation_all * (Information + 1.0) / Information\n",
        "        Generalization_Ratio_.append(Generalization_Ratio)\n",
        "\n",
        "        ############################ Generalization Decision Process (GDP) ###################################\n",
        "        state_now = Generalization_Ratio\n",
        "        loss_before = loss_before.cuda()\n",
        "        state_before = state_before.cuda()\n",
        "        state_dis = state_now - state_before\n",
        "        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)\n",
        "\n",
        "        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,\n",
        "        # choose one based on the reward\n",
        "        if state_dis >= 0.0:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "        else:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "\n",
        "        ####################################\n",
        "        #### optimizer\n",
        "        los = loss - loss_before\n",
        "        state_before = state_now\n",
        "        optimizer_L.zero_grad()\n",
        "        loss.backward()\n",
        "        optimizer_L.step()\n",
        "        loss_before = loss\n",
        "        running_loss += loss.item()\n",
        "\n",
        "        # Print statistics\n",
        "        if step % 5 == 4:  # Print every 500 mini-batches\n",
        "            with torch.no_grad():  # 'with' is a context manager\n",
        "                s_test_image = s_test_image.cuda()\n",
        "                s_test_label = s_test_label.cuda()\n",
        "                s_tra_image = s_tra_image.cuda()\n",
        "                s_tra_label = s_tra_label.cuda()\n",
        "                outputs = net(s_test_image)  # [batch, 10]\n",
        "                predict_y = torch.max(outputs, dim=1)[1]\n",
        "                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)\n",
        "                a_test.append(float(accuracy))\n",
        "                outputs_t = net(s_tra_image)  # [batch, 10]\n",
        "                predict_y_t = torch.max(outputs_t, dim=1)[1]\n",
        "                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)\n",
        "                a_train.append(float(accuracy_t))\n",
        "                lossaaa.append(float(running_loss / 5))\n",
        "                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %\n",
        "                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))\n",
        "                running_loss = 0.0\n",
        "                running_loss = 0.0"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "piHuwdWfQAXH"
      },
      "outputs": [],
      "source": [
        "# Writing training accuracy to file\n",
        "with open('Accuracy_a_train.txt', 'w') as file:\n",
        "    for i in range(len(a_train)):\n",
        "        file.write(str(a_train[i]))\n",
        "        if i < len(a_train) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing test accuracy to file\n",
        "with open('Accuracy_a_test.txt', 'w') as file:\n",
        "    for i in range(len(a_test)):\n",
        "        file.write(str(a_test[i]))\n",
        "        if i < len(a_test) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing loss values to file\n",
        "with open('Accuracy_loss.txt', 'w') as file:\n",
        "    for i in range(len(lossaaa)):\n",
        "        file.write(str(lossaaa[i]))\n",
        "        if i < len(lossaaa) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "oUWFtqFCQHZq"
      },
      "outputs": [],
      "source": [
        "# Convert tensors to lists\n",
        "Inf_list = [tensor.item() for tensor in Inf]\n",
        "Var_av_list = [tensor.item() for tensor in Var_all]\n",
        "Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]\n",
        "\n",
        "# Writing Inf_list to file\n",
        "with open('Inf_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Inf_list)):\n",
        "        file.write(str(Inf_list[i]))\n",
        "        if i < len(Inf_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Var_av_list to file\n",
        "with open('Var_all_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Var_av_list)):\n",
        "        file.write(str(Var_av_list[i]))\n",
        "        if i < len(Var_av_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Generalization_Ratio_list to file\n",
        "with open('Generalization_Ratio_list.txt', 'w') as file:\n",
        "    for i in range(len(Generalization_Ratio_list)):\n",
        "        file.write(str(Generalization_Ratio_list[i]))\n",
        "        if i < len(Generalization_Ratio_list) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "hNz_J0vUQJNG"
      },
      "outputs": [],
      "source": [
        "import time\n",
        "import os  # Import the os module\n",
        "\n",
        "os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist\n",
        "model_path = './Models/'  # Define the model path\n",
        "\n",
        "rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'\n",
        "\n",
        "# Save the training results\n",
        "current_model_path = model_path + rq + \"_model.pkl\"  # Create the full path for the model file with the current timestamp\n",
        "torch.save(net, current_model_path)  # Save the model to the specified path\n",
        "print(\"Saved model file: \" + current_model_path)  # Print the path of the saved model file"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "637Nw0irQLQB",
        "outputId": "869239c0-a26d-46ef-eaad-a4c787665f27"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "train: 0.9044708545557442\n",
            "test: 0.8737259343148358\n",
            "loss: 0.0690862538293004\n"
          ]
        }
      ],
      "source": [
        "# Display the highest value that appears during training\n",
        "print(\"train:\", max(a_train))  # Print the highest value in the training data\n",
        "print(\"test:\", max(a_test))  # Print the highest value in the test data\n",
        "print(\"loss:\", min(lossaaa))  # Print the minimum value of the loss"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uiRPWte9QMIt",
        "outputId": "32dd89aa-5d86-40f5-cbbb-7217591deaec"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "train: 0.8672706681766704\n",
            "test: 0.8431\n"
          ]
        }
      ],
      "source": [
        "# Load training and testing datasets from specified directories and apply transformations\n",
        "trainset = datasets.ImageFolder(root='/content/colorized-MNIST/training', transform=transform)\n",
        "testset = datasets.ImageFolder(root='/content/colorized-MNIST/testing', transform=transform)\n",
        "# Create DataLoader for training and testing datasets with specified batch sizes and other parameters\n",
        "trainloader = torch.utils.data.DataLoader(trainset, batch_size=8830, shuffle=True, num_workers=0)\n",
        "t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)\n",
        "# Get an iterator for the training DataLoader\n",
        "trainloader_iter = iter(trainloader)\n",
        "# Get the next batch of images and labels from the training DataLoader\n",
        "tl_image, tl_label = next(trainloader_iter)\n",
        "# Move the training images and labels to the GPU\n",
        "tl_image = tl_image.cuda()\n",
        "tl_label = tl_label.cuda()\n",
        "# Pass the training images through the neural network to get the outputs\n",
        "tl_imageoutputs = net(tl_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict_y = torch.max(tl_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)\n",
        "# Print the training accuracy\n",
        "print(\"train:\", float(accuracy))\n",
        "# Get an iterator for the testing DataLoader\n",
        "t_data_iter = iter(t_loader)\n",
        "# Get the next batch of images and labels from the testing DataLoader\n",
        "t_image, t_label = next(t_data_iter)\n",
        "# Move the testing images and labels to the GPU\n",
        "t_image = t_image.cuda()\n",
        "t_label = t_label.cuda()\n",
        "# Pass the testing images through the neural network to get the outputs\n",
        "t_imageoutputs = net(t_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict = torch.max(t_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)\n",
        "# Print the testing accuracy\n",
        "print(\"test:\", float(accuracy_t))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3kE8mPl3Ubq1"
      },
      "source": [
        "### **Network1 : use only linear layers (not use GDP)**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "eyfSAImBTQSq"
      },
      "outputs": [],
      "source": [
        "import argparse\n",
        "import os\n",
        "import numpy as np\n",
        "import torchvision.transforms as transforms\n",
        "from torchvision.utils import save_image\n",
        "from torch.utils.data import DataLoader\n",
        "from torchvision import datasets\n",
        "from torch.autograd import Variable\n",
        "import torch.nn as nn\n",
        "import torch\n",
        "\n",
        "class LLNet(nn.Module):  # Use only linear layers\n",
        "    def __init__(self):\n",
        "        super(LLNet, self).__init__()\n",
        "\n",
        "        self.fc1 = nn.Linear(3*28*28, 1000)  # First fully connected layer\n",
        "        self.fc2 = nn.Linear(1000, 500)      # Second fully connected layer\n",
        "        self.fc3 = nn.Linear(500, 100)       # Third fully connected layer\n",
        "        self.fc4 = nn.Linear(100, 50)        # Fourth fully connected layer\n",
        "        self.fc5 = nn.Linear(50, 25)         # Fifth fully connected layer\n",
        "        self.fc6 = nn.Linear(25, 20)         # Sixth fully connected layer\n",
        "        self.fc7 = nn.Linear(20, 10)         # Seventh fully connected layer (output layer)\n",
        "\n",
        "    def forward(self, x):\n",
        "        x = x.view(-1, 3*28*28)  # Flatten the input image\n",
        "        x = self.fc1(x)  # Apply first fully connected layer\n",
        "        x = self.fc2(x)  # Apply second fully connected layer\n",
        "        x = self.fc3(x)  # Apply third fully connected layer\n",
        "        x = self.fc4(x)  # Apply fourth fully connected layer\n",
        "        x = self.fc5(x)  # Apply fifth fully connected layer\n",
        "        x = self.fc6(x)  # Apply sixth fully connected layer\n",
        "        x = self.fc7(x)  # Apply seventh fully connected layer (output)\n",
        "        return x\n",
        "\n",
        "loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function\n",
        "net = LLNet()  # Instantiate the network\n",
        "\n",
        "# If a GPU is available, run everything in CUDA mode\n",
        "if torch.cuda.is_available():\n",
        "    net = net.cuda()  # Move the network to GPU\n",
        "    loss_function = loss_function.cuda()  # Move the loss function to GPU\n",
        "\n",
        "optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Define the optimizer with a learning rate of 0.001"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rY0q1qBWougF",
        "outputId": "472b734d-f8d5-4a99-c905-f8d427b9b23f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[1,     5] train_loss: 2.645 train_accuracy: 0.105 test_accuracy: 0.081\n",
            "[1,    10] train_loss: 2.569 train_accuracy: 0.101 test_accuracy: 0.075\n",
            "[1,    15] train_loss: 2.368 train_accuracy: 0.168 test_accuracy: 0.154\n",
            "[1,    20] train_loss: 2.345 train_accuracy: 0.157 test_accuracy: 0.185\n",
            "[1,    25] train_loss: 2.315 train_accuracy: 0.249 test_accuracy: 0.276\n",
            "[1,    30] train_loss: 2.027 train_accuracy: 0.233 test_accuracy: 0.231\n",
            "[1,    35] train_loss: 2.402 train_accuracy: 0.253 test_accuracy: 0.278\n",
            "[1,    40] train_loss: 1.828 train_accuracy: 0.263 test_accuracy: 0.290\n",
            "[1,    45] train_loss: 1.846 train_accuracy: 0.375 test_accuracy: 0.400\n",
            "[1,    50] train_loss: 1.544 train_accuracy: 0.395 test_accuracy: 0.387\n",
            "[1,    55] train_loss: 2.069 train_accuracy: 0.315 test_accuracy: 0.348\n",
            "[1,    60] train_loss: 1.917 train_accuracy: 0.309 test_accuracy: 0.353\n",
            "[1,    65] train_loss: 1.813 train_accuracy: 0.283 test_accuracy: 0.322\n",
            "[1,    70] train_loss: 1.912 train_accuracy: 0.342 test_accuracy: 0.373\n",
            "[1,    75] train_loss: 2.290 train_accuracy: 0.339 test_accuracy: 0.361\n",
            "[1,    80] train_loss: 1.877 train_accuracy: 0.362 test_accuracy: 0.394\n",
            "[1,    85] train_loss: 2.445 train_accuracy: 0.449 test_accuracy: 0.485\n",
            "[1,    90] train_loss: 1.802 train_accuracy: 0.484 test_accuracy: 0.506\n",
            "[1,    95] train_loss: 1.494 train_accuracy: 0.504 test_accuracy: 0.536\n",
            "[1,   100] train_loss: 1.517 train_accuracy: 0.500 test_accuracy: 0.534\n",
            "[1,   105] train_loss: 1.787 train_accuracy: 0.533 test_accuracy: 0.516\n",
            "[1,   110] train_loss: 1.545 train_accuracy: 0.536 test_accuracy: 0.514\n",
            "[1,   115] train_loss: 1.036 train_accuracy: 0.526 test_accuracy: 0.537\n",
            "[1,   120] train_loss: 1.309 train_accuracy: 0.513 test_accuracy: 0.549\n",
            "[1,   125] train_loss: 0.942 train_accuracy: 0.463 test_accuracy: 0.468\n",
            "[1,   130] train_loss: 1.776 train_accuracy: 0.518 test_accuracy: 0.470\n",
            "[1,   135] train_loss: 1.319 train_accuracy: 0.497 test_accuracy: 0.502\n",
            "[1,   140] train_loss: 1.389 train_accuracy: 0.541 test_accuracy: 0.575\n",
            "[1,   145] train_loss: 1.067 train_accuracy: 0.540 test_accuracy: 0.591\n",
            "[1,   150] train_loss: 1.724 train_accuracy: 0.552 test_accuracy: 0.606\n",
            "[1,   155] train_loss: 1.407 train_accuracy: 0.530 test_accuracy: 0.582\n",
            "[1,   160] train_loss: 1.501 train_accuracy: 0.575 test_accuracy: 0.612\n",
            "[1,   165] train_loss: 1.211 train_accuracy: 0.525 test_accuracy: 0.558\n",
            "[1,   170] train_loss: 1.417 train_accuracy: 0.601 test_accuracy: 0.633\n",
            "[1,   175] train_loss: 0.912 train_accuracy: 0.455 test_accuracy: 0.482\n",
            "[1,   180] train_loss: 1.548 train_accuracy: 0.524 test_accuracy: 0.591\n",
            "[1,   185] train_loss: 0.973 train_accuracy: 0.537 test_accuracy: 0.601\n",
            "[1,   190] train_loss: 1.124 train_accuracy: 0.569 test_accuracy: 0.623\n",
            "[1,   195] train_loss: 1.399 train_accuracy: 0.484 test_accuracy: 0.498\n",
            "[1,   200] train_loss: 1.624 train_accuracy: 0.533 test_accuracy: 0.558\n",
            "[1,   205] train_loss: 1.663 train_accuracy: 0.544 test_accuracy: 0.549\n",
            "[1,   210] train_loss: 1.248 train_accuracy: 0.603 test_accuracy: 0.622\n",
            "[1,   215] train_loss: 1.189 train_accuracy: 0.550 test_accuracy: 0.589\n",
            "[1,   220] train_loss: 1.658 train_accuracy: 0.536 test_accuracy: 0.584\n",
            "[1,   225] train_loss: 1.346 train_accuracy: 0.562 test_accuracy: 0.613\n",
            "[1,   230] train_loss: 1.616 train_accuracy: 0.578 test_accuracy: 0.627\n",
            "[1,   235] train_loss: 1.178 train_accuracy: 0.609 test_accuracy: 0.659\n",
            "[1,   240] train_loss: 1.013 train_accuracy: 0.667 test_accuracy: 0.705\n",
            "[1,   245] train_loss: 1.115 train_accuracy: 0.617 test_accuracy: 0.641\n",
            "[1,   250] train_loss: 1.220 train_accuracy: 0.641 test_accuracy: 0.662\n",
            "[1,   255] train_loss: 1.169 train_accuracy: 0.570 test_accuracy: 0.597\n",
            "[1,   260] train_loss: 0.836 train_accuracy: 0.626 test_accuracy: 0.660\n",
            "[1,   265] train_loss: 1.313 train_accuracy: 0.645 test_accuracy: 0.682\n",
            "[1,   270] train_loss: 1.031 train_accuracy: 0.650 test_accuracy: 0.671\n",
            "[1,   275] train_loss: 1.135 train_accuracy: 0.674 test_accuracy: 0.674\n",
            "[1,   280] train_loss: 1.058 train_accuracy: 0.704 test_accuracy: 0.696\n",
            "[1,   285] train_loss: 0.880 train_accuracy: 0.640 test_accuracy: 0.678\n",
            "[1,   290] train_loss: 1.154 train_accuracy: 0.585 test_accuracy: 0.634\n",
            "[1,   295] train_loss: 1.396 train_accuracy: 0.622 test_accuracy: 0.644\n",
            "[1,   300] train_loss: 0.735 train_accuracy: 0.576 test_accuracy: 0.552\n",
            "[1,   305] train_loss: 0.892 train_accuracy: 0.583 test_accuracy: 0.568\n",
            "[1,   310] train_loss: 1.385 train_accuracy: 0.679 test_accuracy: 0.690\n",
            "[1,   315] train_loss: 0.991 train_accuracy: 0.620 test_accuracy: 0.623\n",
            "[1,   320] train_loss: 1.302 train_accuracy: 0.643 test_accuracy: 0.652\n",
            "[1,   325] train_loss: 0.868 train_accuracy: 0.670 test_accuracy: 0.673\n",
            "[1,   330] train_loss: 1.172 train_accuracy: 0.661 test_accuracy: 0.652\n",
            "[1,   335] train_loss: 0.941 train_accuracy: 0.718 test_accuracy: 0.737\n",
            "[1,   340] train_loss: 0.473 train_accuracy: 0.609 test_accuracy: 0.658\n",
            "[1,   345] train_loss: 1.224 train_accuracy: 0.610 test_accuracy: 0.640\n",
            "[1,   350] train_loss: 0.954 train_accuracy: 0.667 test_accuracy: 0.655\n",
            "[1,   355] train_loss: 1.068 train_accuracy: 0.637 test_accuracy: 0.591\n",
            "[1,   360] train_loss: 1.432 train_accuracy: 0.632 test_accuracy: 0.604\n",
            "[1,   365] train_loss: 1.532 train_accuracy: 0.557 test_accuracy: 0.537\n",
            "[1,   370] train_loss: 1.643 train_accuracy: 0.658 test_accuracy: 0.685\n",
            "[1,   375] train_loss: 0.920 train_accuracy: 0.567 test_accuracy: 0.578\n",
            "[1,   380] train_loss: 1.209 train_accuracy: 0.526 test_accuracy: 0.529\n",
            "[1,   385] train_loss: 1.539 train_accuracy: 0.554 test_accuracy: 0.513\n",
            "[1,   390] train_loss: 1.320 train_accuracy: 0.602 test_accuracy: 0.584\n",
            "[1,   395] train_loss: 1.040 train_accuracy: 0.578 test_accuracy: 0.614\n",
            "[1,   400] train_loss: 1.174 train_accuracy: 0.612 test_accuracy: 0.651\n",
            "[1,   405] train_loss: 1.017 train_accuracy: 0.649 test_accuracy: 0.684\n",
            "[1,   410] train_loss: 1.077 train_accuracy: 0.626 test_accuracy: 0.678\n",
            "[1,   415] train_loss: 1.296 train_accuracy: 0.635 test_accuracy: 0.695\n",
            "[1,   420] train_loss: 0.760 train_accuracy: 0.623 test_accuracy: 0.664\n",
            "[1,   425] train_loss: 0.879 train_accuracy: 0.599 test_accuracy: 0.624\n",
            "[1,   430] train_loss: 1.101 train_accuracy: 0.676 test_accuracy: 0.671\n",
            "[1,   435] train_loss: 0.667 train_accuracy: 0.674 test_accuracy: 0.640\n",
            "[1,   440] train_loss: 0.634 train_accuracy: 0.634 test_accuracy: 0.598\n",
            "[1,   445] train_loss: 0.854 train_accuracy: 0.591 test_accuracy: 0.547\n",
            "[1,   450] train_loss: 0.924 train_accuracy: 0.626 test_accuracy: 0.602\n",
            "[1,   455] train_loss: 0.985 train_accuracy: 0.657 test_accuracy: 0.710\n",
            "[1,   460] train_loss: 1.239 train_accuracy: 0.655 test_accuracy: 0.712\n",
            "[1,   465] train_loss: 1.715 train_accuracy: 0.621 test_accuracy: 0.687\n",
            "[1,   470] train_loss: 1.828 train_accuracy: 0.672 test_accuracy: 0.704\n",
            "[1,   475] train_loss: 0.658 train_accuracy: 0.586 test_accuracy: 0.596\n",
            "[1,   480] train_loss: 1.165 train_accuracy: 0.602 test_accuracy: 0.635\n",
            "[1,   485] train_loss: 0.767 train_accuracy: 0.545 test_accuracy: 0.595\n",
            "[1,   490] train_loss: 1.049 train_accuracy: 0.585 test_accuracy: 0.617\n",
            "[1,   495] train_loss: 0.679 train_accuracy: 0.616 test_accuracy: 0.614\n",
            "[1,   500] train_loss: 1.387 train_accuracy: 0.648 test_accuracy: 0.630\n",
            "[1,   505] train_loss: 0.900 train_accuracy: 0.692 test_accuracy: 0.699\n",
            "[1,   510] train_loss: 0.998 train_accuracy: 0.713 test_accuracy: 0.705\n",
            "[1,   515] train_loss: 0.812 train_accuracy: 0.678 test_accuracy: 0.653\n",
            "[1,   520] train_loss: 1.053 train_accuracy: 0.643 test_accuracy: 0.639\n",
            "[1,   525] train_loss: 1.205 train_accuracy: 0.700 test_accuracy: 0.710\n",
            "[1,   530] train_loss: 0.800 train_accuracy: 0.737 test_accuracy: 0.752\n",
            "[1,   535] train_loss: 1.023 train_accuracy: 0.717 test_accuracy: 0.757\n",
            "[1,   540] train_loss: 0.720 train_accuracy: 0.673 test_accuracy: 0.739\n",
            "[1,   545] train_loss: 0.909 train_accuracy: 0.669 test_accuracy: 0.704\n",
            "[1,   550] train_loss: 0.849 train_accuracy: 0.658 test_accuracy: 0.660\n",
            "[1,   555] train_loss: 1.204 train_accuracy: 0.748 test_accuracy: 0.726\n",
            "[1,   560] train_loss: 0.990 train_accuracy: 0.775 test_accuracy: 0.749\n",
            "[1,   565] train_loss: 1.247 train_accuracy: 0.715 test_accuracy: 0.689\n",
            "[1,   570] train_loss: 0.617 train_accuracy: 0.800 test_accuracy: 0.788\n",
            "[1,   575] train_loss: 0.564 train_accuracy: 0.767 test_accuracy: 0.768\n",
            "[1,   580] train_loss: 0.506 train_accuracy: 0.699 test_accuracy: 0.728\n",
            "[1,   585] train_loss: 1.098 train_accuracy: 0.717 test_accuracy: 0.751\n",
            "[1,   590] train_loss: 1.129 train_accuracy: 0.711 test_accuracy: 0.716\n",
            "[1,   595] train_loss: 1.155 train_accuracy: 0.749 test_accuracy: 0.746\n",
            "[1,   600] train_loss: 1.629 train_accuracy: 0.711 test_accuracy: 0.722\n",
            "[1,   605] train_loss: 0.809 train_accuracy: 0.696 test_accuracy: 0.701\n",
            "[1,   610] train_loss: 1.120 train_accuracy: 0.682 test_accuracy: 0.648\n",
            "[1,   615] train_loss: 0.876 train_accuracy: 0.698 test_accuracy: 0.664\n",
            "[1,   620] train_loss: 1.220 train_accuracy: 0.710 test_accuracy: 0.726\n",
            "[1,   625] train_loss: 0.593 train_accuracy: 0.614 test_accuracy: 0.672\n",
            "[1,   630] train_loss: 1.417 train_accuracy: 0.749 test_accuracy: 0.797\n",
            "[1,   635] train_loss: 1.048 train_accuracy: 0.759 test_accuracy: 0.762\n",
            "[1,   640] train_loss: 0.891 train_accuracy: 0.736 test_accuracy: 0.708\n",
            "[1,   645] train_loss: 0.740 train_accuracy: 0.700 test_accuracy: 0.676\n",
            "[1,   650] train_loss: 0.518 train_accuracy: 0.702 test_accuracy: 0.722\n",
            "[1,   655] train_loss: 1.459 train_accuracy: 0.699 test_accuracy: 0.730\n",
            "[1,   660] train_loss: 0.949 train_accuracy: 0.687 test_accuracy: 0.696\n",
            "[1,   665] train_loss: 0.832 train_accuracy: 0.695 test_accuracy: 0.696\n",
            "[1,   670] train_loss: 0.764 train_accuracy: 0.703 test_accuracy: 0.742\n",
            "[1,   675] train_loss: 0.916 train_accuracy: 0.702 test_accuracy: 0.730\n",
            "[1,   680] train_loss: 1.305 train_accuracy: 0.662 test_accuracy: 0.668\n",
            "[1,   685] train_loss: 0.837 train_accuracy: 0.705 test_accuracy: 0.729\n",
            "[1,   690] train_loss: 0.916 train_accuracy: 0.601 test_accuracy: 0.637\n",
            "[1,   695] train_loss: 0.469 train_accuracy: 0.616 test_accuracy: 0.651\n",
            "[1,   700] train_loss: 1.626 train_accuracy: 0.760 test_accuracy: 0.777\n",
            "[1,   705] train_loss: 0.556 train_accuracy: 0.721 test_accuracy: 0.695\n",
            "[1,   710] train_loss: 0.687 train_accuracy: 0.680 test_accuracy: 0.658\n",
            "[1,   715] train_loss: 1.243 train_accuracy: 0.674 test_accuracy: 0.654\n",
            "[1,   720] train_loss: 0.883 train_accuracy: 0.741 test_accuracy: 0.746\n",
            "[1,   725] train_loss: 0.781 train_accuracy: 0.736 test_accuracy: 0.768\n",
            "[1,   730] train_loss: 0.851 train_accuracy: 0.695 test_accuracy: 0.716\n",
            "[1,   735] train_loss: 1.258 train_accuracy: 0.719 test_accuracy: 0.728\n",
            "[1,   740] train_loss: 1.050 train_accuracy: 0.670 test_accuracy: 0.646\n",
            "[1,   745] train_loss: 0.780 train_accuracy: 0.626 test_accuracy: 0.599\n",
            "[1,   750] train_loss: 0.849 train_accuracy: 0.656 test_accuracy: 0.683\n",
            "[1,   755] train_loss: 1.241 train_accuracy: 0.696 test_accuracy: 0.737\n",
            "[1,   760] train_loss: 0.448 train_accuracy: 0.689 test_accuracy: 0.729\n",
            "[1,   765] train_loss: 0.886 train_accuracy: 0.677 test_accuracy: 0.701\n",
            "[1,   770] train_loss: 0.713 train_accuracy: 0.636 test_accuracy: 0.667\n",
            "[1,   775] train_loss: 1.265 train_accuracy: 0.640 test_accuracy: 0.698\n",
            "[1,   780] train_loss: 0.799 train_accuracy: 0.598 test_accuracy: 0.647\n",
            "[1,   785] train_loss: 0.742 train_accuracy: 0.661 test_accuracy: 0.703\n",
            "[1,   790] train_loss: 0.775 train_accuracy: 0.699 test_accuracy: 0.711\n",
            "[1,   795] train_loss: 1.913 train_accuracy: 0.677 test_accuracy: 0.652\n",
            "[1,   800] train_loss: 1.351 train_accuracy: 0.655 test_accuracy: 0.634\n",
            "[1,   805] train_loss: 1.255 train_accuracy: 0.614 test_accuracy: 0.604\n",
            "[1,   810] train_loss: 0.827 train_accuracy: 0.601 test_accuracy: 0.588\n",
            "[1,   815] train_loss: 1.315 train_accuracy: 0.674 test_accuracy: 0.664\n",
            "[1,   820] train_loss: 0.698 train_accuracy: 0.667 test_accuracy: 0.663\n",
            "[1,   825] train_loss: 0.644 train_accuracy: 0.635 test_accuracy: 0.644\n",
            "[1,   830] train_loss: 0.844 train_accuracy: 0.671 test_accuracy: 0.657\n",
            "[1,   835] train_loss: 0.781 train_accuracy: 0.701 test_accuracy: 0.697\n",
            "[1,   840] train_loss: 0.745 train_accuracy: 0.723 test_accuracy: 0.730\n",
            "[1,   845] train_loss: 1.503 train_accuracy: 0.686 test_accuracy: 0.689\n",
            "[1,   850] train_loss: 1.098 train_accuracy: 0.671 test_accuracy: 0.700\n",
            "[1,   855] train_loss: 1.081 train_accuracy: 0.665 test_accuracy: 0.677\n",
            "[1,   860] train_loss: 0.858 train_accuracy: 0.624 test_accuracy: 0.623\n",
            "[1,   865] train_loss: 1.261 train_accuracy: 0.703 test_accuracy: 0.690\n",
            "[1,   870] train_loss: 0.867 train_accuracy: 0.663 test_accuracy: 0.645\n",
            "[1,   875] train_loss: 0.814 train_accuracy: 0.632 test_accuracy: 0.619\n",
            "[1,   880] train_loss: 1.007 train_accuracy: 0.687 test_accuracy: 0.675\n",
            "[1,   885] train_loss: 1.050 train_accuracy: 0.743 test_accuracy: 0.746\n",
            "[1,   890] train_loss: 0.452 train_accuracy: 0.713 test_accuracy: 0.720\n",
            "[1,   895] train_loss: 1.487 train_accuracy: 0.726 test_accuracy: 0.734\n",
            "[1,   900] train_loss: 0.709 train_accuracy: 0.740 test_accuracy: 0.738\n",
            "[1,   905] train_loss: 0.992 train_accuracy: 0.777 test_accuracy: 0.761\n",
            "[1,   910] train_loss: 0.980 train_accuracy: 0.703 test_accuracy: 0.722\n",
            "[1,   915] train_loss: 0.928 train_accuracy: 0.762 test_accuracy: 0.763\n",
            "[1,   920] train_loss: 0.424 train_accuracy: 0.775 test_accuracy: 0.750\n",
            "[1,   925] train_loss: 0.513 train_accuracy: 0.766 test_accuracy: 0.751\n",
            "[1,   930] train_loss: 0.528 train_accuracy: 0.742 test_accuracy: 0.760\n",
            "[1,   935] train_loss: 0.666 train_accuracy: 0.780 test_accuracy: 0.822\n",
            "[1,   940] train_loss: 0.853 train_accuracy: 0.770 test_accuracy: 0.786\n",
            "[1,   945] train_loss: 1.254 train_accuracy: 0.651 test_accuracy: 0.696\n",
            "[1,   950] train_loss: 1.758 train_accuracy: 0.750 test_accuracy: 0.744\n",
            "[1,   955] train_loss: 1.479 train_accuracy: 0.689 test_accuracy: 0.682\n",
            "[1,   960] train_loss: 0.822 train_accuracy: 0.564 test_accuracy: 0.567\n",
            "[1,   965] train_loss: 1.841 train_accuracy: 0.662 test_accuracy: 0.629\n",
            "[1,   970] train_loss: 1.194 train_accuracy: 0.691 test_accuracy: 0.660\n",
            "[1,   975] train_loss: 0.880 train_accuracy: 0.641 test_accuracy: 0.623\n",
            "[1,   980] train_loss: 1.025 train_accuracy: 0.637 test_accuracy: 0.651\n",
            "[1,   985] train_loss: 1.124 train_accuracy: 0.696 test_accuracy: 0.717\n",
            "[1,   990] train_loss: 1.322 train_accuracy: 0.706 test_accuracy: 0.734\n",
            "[1,   995] train_loss: 0.741 train_accuracy: 0.679 test_accuracy: 0.711\n",
            "[1,  1000] train_loss: 0.950 train_accuracy: 0.720 test_accuracy: 0.714\n",
            "[1,  1005] train_loss: 0.953 train_accuracy: 0.715 test_accuracy: 0.684\n",
            "[1,  1010] train_loss: 1.004 train_accuracy: 0.775 test_accuracy: 0.747\n",
            "[1,  1015] train_loss: 1.120 train_accuracy: 0.762 test_accuracy: 0.762\n",
            "[1,  1020] train_loss: 0.524 train_accuracy: 0.668 test_accuracy: 0.686\n",
            "[1,  1025] train_loss: 1.069 train_accuracy: 0.701 test_accuracy: 0.724\n",
            "[1,  1030] train_loss: 0.592 train_accuracy: 0.749 test_accuracy: 0.772\n",
            "[1,  1035] train_loss: 0.486 train_accuracy: 0.669 test_accuracy: 0.696\n",
            "[1,  1040] train_loss: 1.300 train_accuracy: 0.723 test_accuracy: 0.731\n",
            "[1,  1045] train_loss: 0.939 train_accuracy: 0.740 test_accuracy: 0.705\n",
            "[1,  1050] train_loss: 0.794 train_accuracy: 0.734 test_accuracy: 0.698\n",
            "[1,  1055] train_loss: 1.096 train_accuracy: 0.751 test_accuracy: 0.740\n",
            "[1,  1060] train_loss: 0.524 train_accuracy: 0.663 test_accuracy: 0.686\n",
            "[1,  1065] train_loss: 1.327 train_accuracy: 0.729 test_accuracy: 0.764\n",
            "[1,  1070] train_loss: 0.721 train_accuracy: 0.742 test_accuracy: 0.781\n",
            "[1,  1075] train_loss: 0.845 train_accuracy: 0.762 test_accuracy: 0.786\n",
            "[1,  1080] train_loss: 1.065 train_accuracy: 0.784 test_accuracy: 0.802\n",
            "[1,  1085] train_loss: 0.682 train_accuracy: 0.769 test_accuracy: 0.765\n",
            "[1,  1090] train_loss: 0.724 train_accuracy: 0.767 test_accuracy: 0.762\n",
            "[1,  1095] train_loss: 0.409 train_accuracy: 0.759 test_accuracy: 0.751\n",
            "[1,  1100] train_loss: 0.638 train_accuracy: 0.770 test_accuracy: 0.758\n",
            "[1,  1105] train_loss: 0.955 train_accuracy: 0.792 test_accuracy: 0.776\n",
            "[1,  1110] train_loss: 0.744 train_accuracy: 0.766 test_accuracy: 0.756\n",
            "[1,  1115] train_loss: 0.761 train_accuracy: 0.766 test_accuracy: 0.769\n",
            "[1,  1120] train_loss: 0.712 train_accuracy: 0.808 test_accuracy: 0.813\n",
            "[1,  1125] train_loss: 0.365 train_accuracy: 0.763 test_accuracy: 0.787\n",
            "[1,  1130] train_loss: 0.946 train_accuracy: 0.811 test_accuracy: 0.804\n",
            "[1,  1135] train_loss: 0.684 train_accuracy: 0.786 test_accuracy: 0.771\n",
            "[1,  1140] train_loss: 0.551 train_accuracy: 0.762 test_accuracy: 0.751\n",
            "[1,  1145] train_loss: 0.657 train_accuracy: 0.762 test_accuracy: 0.762\n",
            "[1,  1150] train_loss: 1.026 train_accuracy: 0.778 test_accuracy: 0.781\n",
            "[1,  1155] train_loss: 0.423 train_accuracy: 0.775 test_accuracy: 0.781\n",
            "[1,  1160] train_loss: 0.773 train_accuracy: 0.816 test_accuracy: 0.830\n",
            "[1,  1165] train_loss: 0.691 train_accuracy: 0.817 test_accuracy: 0.827\n",
            "[1,  1170] train_loss: 0.602 train_accuracy: 0.740 test_accuracy: 0.739\n",
            "[1,  1175] train_loss: 0.857 train_accuracy: 0.799 test_accuracy: 0.784\n",
            "[1,  1180] train_loss: 0.458 train_accuracy: 0.760 test_accuracy: 0.756\n",
            "[1,  1185] train_loss: 0.333 train_accuracy: 0.780 test_accuracy: 0.782\n",
            "[1,  1190] train_loss: 0.708 train_accuracy: 0.761 test_accuracy: 0.774\n",
            "[1,  1195] train_loss: 0.772 train_accuracy: 0.806 test_accuracy: 0.810\n",
            "[1,  1200] train_loss: 0.717 train_accuracy: 0.819 test_accuracy: 0.809\n",
            "[1,  1205] train_loss: 0.740 train_accuracy: 0.781 test_accuracy: 0.771\n",
            "[1,  1210] train_loss: 1.101 train_accuracy: 0.774 test_accuracy: 0.797\n",
            "[1,  1215] train_loss: 0.947 train_accuracy: 0.787 test_accuracy: 0.793\n",
            "[1,  1220] train_loss: 0.734 train_accuracy: 0.770 test_accuracy: 0.790\n",
            "[1,  1225] train_loss: 0.818 train_accuracy: 0.803 test_accuracy: 0.820\n",
            "[1,  1230] train_loss: 0.849 train_accuracy: 0.835 test_accuracy: 0.829\n",
            "[1,  1235] train_loss: 0.587 train_accuracy: 0.817 test_accuracy: 0.804\n",
            "[1,  1240] train_loss: 0.808 train_accuracy: 0.783 test_accuracy: 0.769\n",
            "[1,  1245] train_loss: 0.865 train_accuracy: 0.797 test_accuracy: 0.783\n",
            "[1,  1250] train_loss: 0.916 train_accuracy: 0.815 test_accuracy: 0.808\n",
            "[1,  1255] train_loss: 0.586 train_accuracy: 0.795 test_accuracy: 0.814\n",
            "[1,  1260] train_loss: 0.609 train_accuracy: 0.796 test_accuracy: 0.808\n",
            "[1,  1265] train_loss: 0.772 train_accuracy: 0.812 test_accuracy: 0.807\n",
            "[1,  1270] train_loss: 0.745 train_accuracy: 0.819 test_accuracy: 0.812\n",
            "[1,  1275] train_loss: 0.450 train_accuracy: 0.819 test_accuracy: 0.809\n",
            "[1,  1280] train_loss: 0.632 train_accuracy: 0.794 test_accuracy: 0.783\n",
            "[1,  1285] train_loss: 0.507 train_accuracy: 0.741 test_accuracy: 0.727\n",
            "[1,  1290] train_loss: 0.768 train_accuracy: 0.791 test_accuracy: 0.781\n",
            "[1,  1295] train_loss: 0.559 train_accuracy: 0.828 test_accuracy: 0.828\n",
            "[1,  1300] train_loss: 0.210 train_accuracy: 0.827 test_accuracy: 0.823\n",
            "[1,  1305] train_loss: 0.717 train_accuracy: 0.819 test_accuracy: 0.811\n",
            "[1,  1310] train_loss: 0.708 train_accuracy: 0.762 test_accuracy: 0.760\n",
            "[1,  1315] train_loss: 0.689 train_accuracy: 0.793 test_accuracy: 0.791\n",
            "[1,  1320] train_loss: 0.833 train_accuracy: 0.789 test_accuracy: 0.761\n",
            "[1,  1325] train_loss: 0.894 train_accuracy: 0.760 test_accuracy: 0.733\n",
            "[1,  1330] train_loss: 0.771 train_accuracy: 0.716 test_accuracy: 0.699\n",
            "[1,  1335] train_loss: 1.049 train_accuracy: 0.739 test_accuracy: 0.728\n",
            "[1,  1340] train_loss: 0.721 train_accuracy: 0.733 test_accuracy: 0.748\n",
            "[1,  1345] train_loss: 0.908 train_accuracy: 0.727 test_accuracy: 0.755\n",
            "[1,  1350] train_loss: 0.456 train_accuracy: 0.748 test_accuracy: 0.744\n",
            "[1,  1355] train_loss: 1.039 train_accuracy: 0.755 test_accuracy: 0.719\n",
            "[1,  1360] train_loss: 0.861 train_accuracy: 0.758 test_accuracy: 0.720\n",
            "[1,  1365] train_loss: 0.741 train_accuracy: 0.792 test_accuracy: 0.777\n",
            "[1,  1370] train_loss: 0.674 train_accuracy: 0.822 test_accuracy: 0.808\n",
            "[1,  1375] train_loss: 0.896 train_accuracy: 0.802 test_accuracy: 0.793\n",
            "[1,  1380] train_loss: 0.916 train_accuracy: 0.760 test_accuracy: 0.747\n",
            "[1,  1385] train_loss: 0.624 train_accuracy: 0.792 test_accuracy: 0.790\n",
            "[1,  1390] train_loss: 1.084 train_accuracy: 0.822 test_accuracy: 0.830\n",
            "[1,  1395] train_loss: 0.445 train_accuracy: 0.835 test_accuracy: 0.831\n",
            "[1,  1400] train_loss: 0.425 train_accuracy: 0.837 test_accuracy: 0.828\n",
            "[1,  1405] train_loss: 0.565 train_accuracy: 0.832 test_accuracy: 0.824\n",
            "[1,  1410] train_loss: 0.423 train_accuracy: 0.824 test_accuracy: 0.812\n",
            "[1,  1415] train_loss: 0.724 train_accuracy: 0.825 test_accuracy: 0.812\n",
            "[1,  1420] train_loss: 0.547 train_accuracy: 0.827 test_accuracy: 0.815\n",
            "[1,  1425] train_loss: 0.567 train_accuracy: 0.790 test_accuracy: 0.772\n",
            "[1,  1430] train_loss: 0.544 train_accuracy: 0.752 test_accuracy: 0.736\n",
            "[1,  1435] train_loss: 0.697 train_accuracy: 0.777 test_accuracy: 0.759\n",
            "[1,  1440] train_loss: 0.307 train_accuracy: 0.790 test_accuracy: 0.769\n",
            "[1,  1445] train_loss: 0.510 train_accuracy: 0.816 test_accuracy: 0.800\n",
            "[1,  1450] train_loss: 0.277 train_accuracy: 0.799 test_accuracy: 0.804\n",
            "[1,  1455] train_loss: 0.517 train_accuracy: 0.813 test_accuracy: 0.823\n",
            "[1,  1460] train_loss: 1.010 train_accuracy: 0.817 test_accuracy: 0.824\n",
            "[1,  1465] train_loss: 0.812 train_accuracy: 0.772 test_accuracy: 0.785\n",
            "[1,  1470] train_loss: 0.944 train_accuracy: 0.765 test_accuracy: 0.767\n",
            "[1,  1475] train_loss: 0.514 train_accuracy: 0.830 test_accuracy: 0.829\n",
            "[1,  1480] train_loss: 0.421 train_accuracy: 0.791 test_accuracy: 0.792\n",
            "[1,  1485] train_loss: 0.731 train_accuracy: 0.791 test_accuracy: 0.809\n",
            "[1,  1490] train_loss: 1.080 train_accuracy: 0.758 test_accuracy: 0.761\n",
            "[1,  1495] train_loss: 0.998 train_accuracy: 0.758 test_accuracy: 0.742\n",
            "[1,  1500] train_loss: 0.257 train_accuracy: 0.713 test_accuracy: 0.681\n",
            "[1,  1505] train_loss: 1.286 train_accuracy: 0.744 test_accuracy: 0.719\n",
            "[1,  1510] train_loss: 0.646 train_accuracy: 0.766 test_accuracy: 0.742\n",
            "[1,  1515] train_loss: 0.927 train_accuracy: 0.773 test_accuracy: 0.763\n",
            "[1,  1520] train_loss: 0.990 train_accuracy: 0.752 test_accuracy: 0.777\n",
            "[1,  1525] train_loss: 0.985 train_accuracy: 0.762 test_accuracy: 0.804\n",
            "[1,  1530] train_loss: 0.815 train_accuracy: 0.772 test_accuracy: 0.810\n",
            "[1,  1535] train_loss: 0.970 train_accuracy: 0.788 test_accuracy: 0.813\n",
            "[1,  1540] train_loss: 0.575 train_accuracy: 0.770 test_accuracy: 0.749\n",
            "[1,  1545] train_loss: 0.983 train_accuracy: 0.763 test_accuracy: 0.724\n",
            "[1,  1550] train_loss: 0.601 train_accuracy: 0.713 test_accuracy: 0.685\n",
            "[1,  1555] train_loss: 0.807 train_accuracy: 0.728 test_accuracy: 0.698\n",
            "[1,  1560] train_loss: 1.413 train_accuracy: 0.811 test_accuracy: 0.783\n",
            "[1,  1565] train_loss: 0.963 train_accuracy: 0.795 test_accuracy: 0.779\n",
            "[1,  1570] train_loss: 1.043 train_accuracy: 0.783 test_accuracy: 0.777\n",
            "[1,  1575] train_loss: 1.451 train_accuracy: 0.802 test_accuracy: 0.782\n",
            "[1,  1580] train_loss: 0.745 train_accuracy: 0.724 test_accuracy: 0.688\n",
            "[1,  1585] train_loss: 0.880 train_accuracy: 0.756 test_accuracy: 0.730\n",
            "[1,  1590] train_loss: 0.614 train_accuracy: 0.754 test_accuracy: 0.740\n",
            "[1,  1595] train_loss: 0.489 train_accuracy: 0.768 test_accuracy: 0.762\n",
            "[1,  1600] train_loss: 1.247 train_accuracy: 0.778 test_accuracy: 0.766\n",
            "[1,  1605] train_loss: 0.485 train_accuracy: 0.758 test_accuracy: 0.749\n",
            "[1,  1610] train_loss: 1.033 train_accuracy: 0.758 test_accuracy: 0.749\n",
            "[1,  1615] train_loss: 0.525 train_accuracy: 0.688 test_accuracy: 0.685\n",
            "[1,  1620] train_loss: 1.013 train_accuracy: 0.731 test_accuracy: 0.740\n",
            "[1,  1625] train_loss: 0.731 train_accuracy: 0.762 test_accuracy: 0.757\n",
            "[1,  1630] train_loss: 1.061 train_accuracy: 0.763 test_accuracy: 0.745\n",
            "[1,  1635] train_loss: 1.017 train_accuracy: 0.785 test_accuracy: 0.754\n",
            "[1,  1640] train_loss: 0.774 train_accuracy: 0.727 test_accuracy: 0.740\n",
            "[1,  1645] train_loss: 0.976 train_accuracy: 0.685 test_accuracy: 0.722\n",
            "[1,  1650] train_loss: 0.534 train_accuracy: 0.723 test_accuracy: 0.737\n",
            "[1,  1655] train_loss: 0.499 train_accuracy: 0.734 test_accuracy: 0.742\n",
            "[1,  1660] train_loss: 1.398 train_accuracy: 0.821 test_accuracy: 0.826\n",
            "[1,  1665] train_loss: 0.818 train_accuracy: 0.795 test_accuracy: 0.808\n",
            "[1,  1670] train_loss: 0.502 train_accuracy: 0.792 test_accuracy: 0.782\n",
            "[1,  1675] train_loss: 0.579 train_accuracy: 0.806 test_accuracy: 0.790\n",
            "[1,  1680] train_loss: 0.353 train_accuracy: 0.803 test_accuracy: 0.775\n",
            "[1,  1685] train_loss: 1.073 train_accuracy: 0.768 test_accuracy: 0.740\n",
            "[1,  1690] train_loss: 0.553 train_accuracy: 0.767 test_accuracy: 0.748\n",
            "[1,  1695] train_loss: 0.723 train_accuracy: 0.790 test_accuracy: 0.778\n",
            "[1,  1700] train_loss: 0.708 train_accuracy: 0.796 test_accuracy: 0.794\n",
            "[1,  1705] train_loss: 0.601 train_accuracy: 0.814 test_accuracy: 0.814\n",
            "[1,  1710] train_loss: 0.568 train_accuracy: 0.765 test_accuracy: 0.755\n",
            "[1,  1715] train_loss: 0.900 train_accuracy: 0.766 test_accuracy: 0.765\n",
            "[1,  1720] train_loss: 0.524 train_accuracy: 0.799 test_accuracy: 0.806\n",
            "[1,  1725] train_loss: 0.740 train_accuracy: 0.797 test_accuracy: 0.807\n",
            "[2,     5] train_loss: 0.656 train_accuracy: 0.777 test_accuracy: 0.801\n",
            "[2,    10] train_loss: 0.839 train_accuracy: 0.712 test_accuracy: 0.739\n",
            "[2,    15] train_loss: 0.654 train_accuracy: 0.759 test_accuracy: 0.767\n",
            "[2,    20] train_loss: 0.518 train_accuracy: 0.796 test_accuracy: 0.784\n",
            "[2,    25] train_loss: 0.907 train_accuracy: 0.776 test_accuracy: 0.771\n",
            "[2,    30] train_loss: 0.458 train_accuracy: 0.763 test_accuracy: 0.763\n",
            "[2,    35] train_loss: 0.677 train_accuracy: 0.816 test_accuracy: 0.807\n",
            "[2,    40] train_loss: 0.430 train_accuracy: 0.814 test_accuracy: 0.798\n",
            "[2,    45] train_loss: 0.440 train_accuracy: 0.791 test_accuracy: 0.766\n",
            "[2,    50] train_loss: 0.664 train_accuracy: 0.827 test_accuracy: 0.807\n",
            "[2,    55] train_loss: 0.394 train_accuracy: 0.817 test_accuracy: 0.810\n",
            "[2,    60] train_loss: 0.892 train_accuracy: 0.805 test_accuracy: 0.799\n",
            "[2,    65] train_loss: 0.456 train_accuracy: 0.821 test_accuracy: 0.809\n",
            "[2,    70] train_loss: 0.692 train_accuracy: 0.812 test_accuracy: 0.808\n",
            "[2,    75] train_loss: 0.653 train_accuracy: 0.788 test_accuracy: 0.786\n",
            "[2,    80] train_loss: 0.610 train_accuracy: 0.763 test_accuracy: 0.753\n",
            "[2,    85] train_loss: 1.194 train_accuracy: 0.792 test_accuracy: 0.758\n",
            "[2,    90] train_loss: 0.316 train_accuracy: 0.784 test_accuracy: 0.758\n",
            "[2,    95] train_loss: 0.752 train_accuracy: 0.796 test_accuracy: 0.763\n",
            "[2,   100] train_loss: 0.957 train_accuracy: 0.762 test_accuracy: 0.727\n",
            "[2,   105] train_loss: 1.099 train_accuracy: 0.811 test_accuracy: 0.788\n",
            "[2,   110] train_loss: 0.455 train_accuracy: 0.797 test_accuracy: 0.801\n",
            "[2,   115] train_loss: 1.122 train_accuracy: 0.773 test_accuracy: 0.790\n",
            "[2,   120] train_loss: 0.687 train_accuracy: 0.808 test_accuracy: 0.810\n",
            "[2,   125] train_loss: 0.632 train_accuracy: 0.782 test_accuracy: 0.779\n",
            "[2,   130] train_loss: 0.345 train_accuracy: 0.813 test_accuracy: 0.798\n",
            "[2,   135] train_loss: 0.488 train_accuracy: 0.818 test_accuracy: 0.805\n",
            "[2,   140] train_loss: 0.731 train_accuracy: 0.827 test_accuracy: 0.814\n",
            "[2,   145] train_loss: 0.411 train_accuracy: 0.806 test_accuracy: 0.789\n",
            "[2,   150] train_loss: 0.673 train_accuracy: 0.816 test_accuracy: 0.818\n",
            "[2,   155] train_loss: 0.454 train_accuracy: 0.818 test_accuracy: 0.831\n",
            "[2,   160] train_loss: 0.689 train_accuracy: 0.829 test_accuracy: 0.840\n",
            "[2,   165] train_loss: 0.408 train_accuracy: 0.792 test_accuracy: 0.803\n",
            "[2,   170] train_loss: 0.556 train_accuracy: 0.800 test_accuracy: 0.793\n",
            "[2,   175] train_loss: 0.969 train_accuracy: 0.819 test_accuracy: 0.818\n",
            "[2,   180] train_loss: 0.318 train_accuracy: 0.801 test_accuracy: 0.809\n",
            "[2,   185] train_loss: 0.685 train_accuracy: 0.788 test_accuracy: 0.808\n",
            "[2,   190] train_loss: 1.154 train_accuracy: 0.781 test_accuracy: 0.805\n",
            "[2,   195] train_loss: 0.976 train_accuracy: 0.813 test_accuracy: 0.812\n",
            "[2,   200] train_loss: 1.046 train_accuracy: 0.852 test_accuracy: 0.835\n",
            "[2,   205] train_loss: 0.989 train_accuracy: 0.809 test_accuracy: 0.778\n",
            "[2,   210] train_loss: 0.871 train_accuracy: 0.786 test_accuracy: 0.761\n",
            "[2,   215] train_loss: 0.403 train_accuracy: 0.782 test_accuracy: 0.770\n",
            "[2,   220] train_loss: 0.424 train_accuracy: 0.798 test_accuracy: 0.806\n",
            "[2,   225] train_loss: 0.514 train_accuracy: 0.790 test_accuracy: 0.804\n",
            "[2,   230] train_loss: 1.209 train_accuracy: 0.799 test_accuracy: 0.817\n",
            "[2,   235] train_loss: 0.572 train_accuracy: 0.817 test_accuracy: 0.815\n",
            "[2,   240] train_loss: 0.640 train_accuracy: 0.826 test_accuracy: 0.810\n",
            "[2,   245] train_loss: 0.349 train_accuracy: 0.834 test_accuracy: 0.819\n",
            "[2,   250] train_loss: 0.624 train_accuracy: 0.846 test_accuracy: 0.841\n",
            "[2,   255] train_loss: 0.533 train_accuracy: 0.849 test_accuracy: 0.850\n",
            "[2,   260] train_loss: 0.669 train_accuracy: 0.838 test_accuracy: 0.834\n",
            "[2,   265] train_loss: 0.635 train_accuracy: 0.834 test_accuracy: 0.827\n",
            "[2,   270] train_loss: 0.370 train_accuracy: 0.819 test_accuracy: 0.812\n",
            "[2,   275] train_loss: 0.551 train_accuracy: 0.807 test_accuracy: 0.789\n",
            "[2,   280] train_loss: 0.567 train_accuracy: 0.804 test_accuracy: 0.773\n",
            "[2,   285] train_loss: 0.289 train_accuracy: 0.793 test_accuracy: 0.770\n",
            "[2,   290] train_loss: 0.639 train_accuracy: 0.805 test_accuracy: 0.816\n",
            "[2,   295] train_loss: 0.741 train_accuracy: 0.778 test_accuracy: 0.803\n",
            "[2,   300] train_loss: 0.819 train_accuracy: 0.783 test_accuracy: 0.791\n",
            "[2,   305] train_loss: 0.767 train_accuracy: 0.794 test_accuracy: 0.787\n",
            "[2,   310] train_loss: 0.740 train_accuracy: 0.824 test_accuracy: 0.833\n",
            "[2,   315] train_loss: 0.385 train_accuracy: 0.833 test_accuracy: 0.828\n",
            "[2,   320] train_loss: 0.625 train_accuracy: 0.832 test_accuracy: 0.810\n",
            "[2,   325] train_loss: 1.106 train_accuracy: 0.787 test_accuracy: 0.756\n",
            "[2,   330] train_loss: 0.574 train_accuracy: 0.767 test_accuracy: 0.739\n",
            "[2,   335] train_loss: 0.434 train_accuracy: 0.755 test_accuracy: 0.724\n",
            "[2,   340] train_loss: 0.553 train_accuracy: 0.792 test_accuracy: 0.764\n",
            "[2,   345] train_loss: 0.632 train_accuracy: 0.825 test_accuracy: 0.812\n",
            "[2,   350] train_loss: 0.322 train_accuracy: 0.829 test_accuracy: 0.836\n",
            "[2,   355] train_loss: 0.377 train_accuracy: 0.827 test_accuracy: 0.832\n",
            "[2,   360] train_loss: 0.619 train_accuracy: 0.834 test_accuracy: 0.834\n",
            "[2,   365] train_loss: 0.312 train_accuracy: 0.839 test_accuracy: 0.832\n",
            "[2,   370] train_loss: 0.446 train_accuracy: 0.847 test_accuracy: 0.836\n",
            "[2,   375] train_loss: 1.355 train_accuracy: 0.829 test_accuracy: 0.824\n",
            "[2,   380] train_loss: 0.827 train_accuracy: 0.794 test_accuracy: 0.808\n",
            "[2,   385] train_loss: 0.779 train_accuracy: 0.759 test_accuracy: 0.761\n",
            "[2,   390] train_loss: 0.864 train_accuracy: 0.776 test_accuracy: 0.780\n",
            "[2,   395] train_loss: 0.357 train_accuracy: 0.776 test_accuracy: 0.789\n",
            "[2,   400] train_loss: 0.343 train_accuracy: 0.761 test_accuracy: 0.776\n",
            "[2,   405] train_loss: 0.706 train_accuracy: 0.772 test_accuracy: 0.773\n",
            "[2,   410] train_loss: 0.650 train_accuracy: 0.812 test_accuracy: 0.801\n",
            "[2,   415] train_loss: 0.735 train_accuracy: 0.813 test_accuracy: 0.784\n",
            "[2,   420] train_loss: 1.054 train_accuracy: 0.844 test_accuracy: 0.830\n",
            "[2,   425] train_loss: 0.506 train_accuracy: 0.844 test_accuracy: 0.837\n",
            "[2,   430] train_loss: 0.443 train_accuracy: 0.806 test_accuracy: 0.789\n",
            "[2,   435] train_loss: 0.781 train_accuracy: 0.808 test_accuracy: 0.782\n",
            "[2,   440] train_loss: 0.482 train_accuracy: 0.799 test_accuracy: 0.774\n",
            "[2,   445] train_loss: 0.400 train_accuracy: 0.805 test_accuracy: 0.791\n",
            "[2,   450] train_loss: 0.560 train_accuracy: 0.810 test_accuracy: 0.808\n",
            "[2,   455] train_loss: 0.743 train_accuracy: 0.823 test_accuracy: 0.825\n",
            "[2,   460] train_loss: 0.693 train_accuracy: 0.816 test_accuracy: 0.811\n",
            "[2,   465] train_loss: 0.495 train_accuracy: 0.810 test_accuracy: 0.791\n",
            "[2,   470] train_loss: 0.336 train_accuracy: 0.784 test_accuracy: 0.763\n",
            "[2,   475] train_loss: 0.489 train_accuracy: 0.815 test_accuracy: 0.799\n",
            "[2,   480] train_loss: 0.326 train_accuracy: 0.791 test_accuracy: 0.794\n",
            "[2,   485] train_loss: 0.586 train_accuracy: 0.800 test_accuracy: 0.785\n",
            "[2,   490] train_loss: 0.482 train_accuracy: 0.807 test_accuracy: 0.784\n",
            "[2,   495] train_loss: 0.326 train_accuracy: 0.822 test_accuracy: 0.808\n",
            "[2,   500] train_loss: 0.428 train_accuracy: 0.819 test_accuracy: 0.801\n",
            "[2,   505] train_loss: 0.693 train_accuracy: 0.823 test_accuracy: 0.819\n",
            "[2,   510] train_loss: 0.331 train_accuracy: 0.788 test_accuracy: 0.803\n",
            "[2,   515] train_loss: 0.632 train_accuracy: 0.786 test_accuracy: 0.804\n",
            "[2,   520] train_loss: 0.902 train_accuracy: 0.829 test_accuracy: 0.815\n",
            "[2,   525] train_loss: 0.332 train_accuracy: 0.819 test_accuracy: 0.779\n",
            "[2,   530] train_loss: 0.509 train_accuracy: 0.840 test_accuracy: 0.808\n",
            "[2,   535] train_loss: 1.501 train_accuracy: 0.830 test_accuracy: 0.807\n",
            "[2,   540] train_loss: 0.563 train_accuracy: 0.822 test_accuracy: 0.805\n",
            "[2,   545] train_loss: 0.296 train_accuracy: 0.832 test_accuracy: 0.822\n",
            "[2,   550] train_loss: 0.480 train_accuracy: 0.837 test_accuracy: 0.833\n",
            "[2,   555] train_loss: 0.203 train_accuracy: 0.807 test_accuracy: 0.810\n",
            "[2,   560] train_loss: 0.689 train_accuracy: 0.819 test_accuracy: 0.814\n",
            "[2,   565] train_loss: 0.522 train_accuracy: 0.850 test_accuracy: 0.828\n",
            "[2,   570] train_loss: 0.346 train_accuracy: 0.782 test_accuracy: 0.759\n",
            "[2,   575] train_loss: 0.789 train_accuracy: 0.805 test_accuracy: 0.773\n",
            "[2,   580] train_loss: 0.698 train_accuracy: 0.793 test_accuracy: 0.763\n",
            "[2,   585] train_loss: 1.030 train_accuracy: 0.818 test_accuracy: 0.787\n",
            "[2,   590] train_loss: 1.201 train_accuracy: 0.817 test_accuracy: 0.810\n",
            "[2,   595] train_loss: 0.789 train_accuracy: 0.791 test_accuracy: 0.802\n",
            "[2,   600] train_loss: 0.429 train_accuracy: 0.785 test_accuracy: 0.803\n",
            "[2,   605] train_loss: 0.429 train_accuracy: 0.792 test_accuracy: 0.801\n",
            "[2,   610] train_loss: 0.967 train_accuracy: 0.787 test_accuracy: 0.791\n",
            "[2,   615] train_loss: 0.542 train_accuracy: 0.813 test_accuracy: 0.817\n",
            "[2,   620] train_loss: 0.893 train_accuracy: 0.817 test_accuracy: 0.824\n",
            "[2,   625] train_loss: 0.551 train_accuracy: 0.821 test_accuracy: 0.814\n",
            "[2,   630] train_loss: 0.695 train_accuracy: 0.818 test_accuracy: 0.792\n",
            "[2,   635] train_loss: 0.683 train_accuracy: 0.801 test_accuracy: 0.779\n",
            "[2,   640] train_loss: 0.667 train_accuracy: 0.805 test_accuracy: 0.795\n",
            "[2,   645] train_loss: 0.590 train_accuracy: 0.795 test_accuracy: 0.785\n",
            "[2,   650] train_loss: 0.280 train_accuracy: 0.808 test_accuracy: 0.786\n",
            "[2,   655] train_loss: 0.450 train_accuracy: 0.797 test_accuracy: 0.769\n",
            "[2,   660] train_loss: 0.548 train_accuracy: 0.779 test_accuracy: 0.757\n",
            "[2,   665] train_loss: 0.685 train_accuracy: 0.741 test_accuracy: 0.721\n",
            "[2,   670] train_loss: 1.011 train_accuracy: 0.755 test_accuracy: 0.729\n",
            "[2,   675] train_loss: 1.086 train_accuracy: 0.778 test_accuracy: 0.749\n",
            "[2,   680] train_loss: 0.855 train_accuracy: 0.819 test_accuracy: 0.800\n",
            "[2,   685] train_loss: 0.560 train_accuracy: 0.826 test_accuracy: 0.802\n",
            "[2,   690] train_loss: 0.346 train_accuracy: 0.748 test_accuracy: 0.725\n",
            "[2,   695] train_loss: 0.952 train_accuracy: 0.702 test_accuracy: 0.696\n",
            "[2,   700] train_loss: 0.919 train_accuracy: 0.780 test_accuracy: 0.783\n",
            "[2,   705] train_loss: 0.772 train_accuracy: 0.809 test_accuracy: 0.805\n",
            "[2,   710] train_loss: 1.131 train_accuracy: 0.821 test_accuracy: 0.800\n",
            "[2,   715] train_loss: 0.458 train_accuracy: 0.784 test_accuracy: 0.768\n",
            "[2,   720] train_loss: 0.759 train_accuracy: 0.793 test_accuracy: 0.776\n",
            "[2,   725] train_loss: 0.710 train_accuracy: 0.807 test_accuracy: 0.788\n",
            "[2,   730] train_loss: 0.327 train_accuracy: 0.825 test_accuracy: 0.814\n",
            "[2,   735] train_loss: 0.466 train_accuracy: 0.854 test_accuracy: 0.846\n",
            "[2,   740] train_loss: 0.905 train_accuracy: 0.862 test_accuracy: 0.849\n",
            "[2,   745] train_loss: 0.790 train_accuracy: 0.817 test_accuracy: 0.796\n",
            "[2,   750] train_loss: 0.598 train_accuracy: 0.804 test_accuracy: 0.797\n",
            "[2,   755] train_loss: 0.711 train_accuracy: 0.803 test_accuracy: 0.814\n",
            "[2,   760] train_loss: 0.393 train_accuracy: 0.844 test_accuracy: 0.833\n",
            "[2,   765] train_loss: 0.508 train_accuracy: 0.834 test_accuracy: 0.819\n",
            "[2,   770] train_loss: 0.555 train_accuracy: 0.805 test_accuracy: 0.784\n",
            "[2,   775] train_loss: 0.369 train_accuracy: 0.784 test_accuracy: 0.761\n",
            "[2,   780] train_loss: 0.743 train_accuracy: 0.795 test_accuracy: 0.776\n",
            "[2,   785] train_loss: 0.619 train_accuracy: 0.812 test_accuracy: 0.803\n",
            "[2,   790] train_loss: 0.915 train_accuracy: 0.785 test_accuracy: 0.791\n",
            "[2,   795] train_loss: 0.918 train_accuracy: 0.765 test_accuracy: 0.761\n",
            "[2,   800] train_loss: 0.437 train_accuracy: 0.791 test_accuracy: 0.788\n",
            "[2,   805] train_loss: 0.709 train_accuracy: 0.757 test_accuracy: 0.778\n",
            "[2,   810] train_loss: 0.328 train_accuracy: 0.772 test_accuracy: 0.796\n",
            "[2,   815] train_loss: 0.963 train_accuracy: 0.806 test_accuracy: 0.800\n",
            "[2,   820] train_loss: 0.539 train_accuracy: 0.789 test_accuracy: 0.774\n",
            "[2,   825] train_loss: 1.024 train_accuracy: 0.806 test_accuracy: 0.787\n",
            "[2,   830] train_loss: 0.806 train_accuracy: 0.784 test_accuracy: 0.787\n",
            "[2,   835] train_loss: 0.432 train_accuracy: 0.777 test_accuracy: 0.782\n",
            "[2,   840] train_loss: 0.738 train_accuracy: 0.787 test_accuracy: 0.776\n",
            "[2,   845] train_loss: 0.783 train_accuracy: 0.809 test_accuracy: 0.789\n",
            "[2,   850] train_loss: 0.679 train_accuracy: 0.828 test_accuracy: 0.805\n",
            "[2,   855] train_loss: 0.469 train_accuracy: 0.815 test_accuracy: 0.812\n",
            "[2,   860] train_loss: 0.516 train_accuracy: 0.796 test_accuracy: 0.801\n",
            "[2,   865] train_loss: 0.261 train_accuracy: 0.788 test_accuracy: 0.762\n",
            "[2,   870] train_loss: 0.719 train_accuracy: 0.823 test_accuracy: 0.803\n",
            "[2,   875] train_loss: 1.082 train_accuracy: 0.812 test_accuracy: 0.803\n",
            "[2,   880] train_loss: 0.901 train_accuracy: 0.815 test_accuracy: 0.802\n",
            "[2,   885] train_loss: 1.160 train_accuracy: 0.840 test_accuracy: 0.824\n",
            "[2,   890] train_loss: 0.299 train_accuracy: 0.790 test_accuracy: 0.778\n",
            "[2,   895] train_loss: 0.441 train_accuracy: 0.783 test_accuracy: 0.772\n",
            "[2,   900] train_loss: 0.832 train_accuracy: 0.772 test_accuracy: 0.765\n",
            "[2,   905] train_loss: 0.663 train_accuracy: 0.758 test_accuracy: 0.768\n",
            "[2,   910] train_loss: 1.751 train_accuracy: 0.810 test_accuracy: 0.810\n",
            "[2,   915] train_loss: 0.372 train_accuracy: 0.775 test_accuracy: 0.743\n",
            "[2,   920] train_loss: 1.104 train_accuracy: 0.781 test_accuracy: 0.755\n",
            "[2,   925] train_loss: 0.587 train_accuracy: 0.814 test_accuracy: 0.790\n",
            "[2,   930] train_loss: 0.509 train_accuracy: 0.813 test_accuracy: 0.799\n",
            "[2,   935] train_loss: 0.706 train_accuracy: 0.810 test_accuracy: 0.796\n",
            "[2,   940] train_loss: 0.758 train_accuracy: 0.829 test_accuracy: 0.815\n",
            "[2,   945] train_loss: 0.323 train_accuracy: 0.824 test_accuracy: 0.808\n",
            "[2,   950] train_loss: 0.497 train_accuracy: 0.818 test_accuracy: 0.808\n",
            "[2,   955] train_loss: 0.484 train_accuracy: 0.799 test_accuracy: 0.783\n",
            "[2,   960] train_loss: 0.538 train_accuracy: 0.826 test_accuracy: 0.801\n",
            "[2,   965] train_loss: 0.610 train_accuracy: 0.818 test_accuracy: 0.787\n",
            "[2,   970] train_loss: 1.023 train_accuracy: 0.802 test_accuracy: 0.768\n",
            "[2,   975] train_loss: 0.874 train_accuracy: 0.781 test_accuracy: 0.755\n",
            "[2,   980] train_loss: 0.538 train_accuracy: 0.827 test_accuracy: 0.800\n",
            "[2,   985] train_loss: 0.546 train_accuracy: 0.852 test_accuracy: 0.827\n",
            "[2,   990] train_loss: 0.692 train_accuracy: 0.862 test_accuracy: 0.839\n",
            "[2,   995] train_loss: 0.700 train_accuracy: 0.851 test_accuracy: 0.841\n",
            "[2,  1000] train_loss: 0.689 train_accuracy: 0.819 test_accuracy: 0.824\n",
            "[2,  1005] train_loss: 0.511 train_accuracy: 0.816 test_accuracy: 0.832\n",
            "[2,  1010] train_loss: 1.099 train_accuracy: 0.789 test_accuracy: 0.807\n",
            "[2,  1015] train_loss: 0.469 train_accuracy: 0.714 test_accuracy: 0.738\n",
            "[2,  1020] train_loss: 0.751 train_accuracy: 0.688 test_accuracy: 0.716\n",
            "[2,  1025] train_loss: 0.745 train_accuracy: 0.724 test_accuracy: 0.720\n",
            "[2,  1030] train_loss: 0.644 train_accuracy: 0.798 test_accuracy: 0.784\n",
            "[2,  1035] train_loss: 0.607 train_accuracy: 0.833 test_accuracy: 0.823\n",
            "[2,  1040] train_loss: 0.407 train_accuracy: 0.856 test_accuracy: 0.848\n",
            "[2,  1045] train_loss: 0.424 train_accuracy: 0.862 test_accuracy: 0.860\n",
            "[2,  1050] train_loss: 0.318 train_accuracy: 0.850 test_accuracy: 0.844\n",
            "[2,  1055] train_loss: 0.435 train_accuracy: 0.850 test_accuracy: 0.831\n",
            "[2,  1060] train_loss: 0.664 train_accuracy: 0.849 test_accuracy: 0.814\n",
            "[2,  1065] train_loss: 0.449 train_accuracy: 0.834 test_accuracy: 0.809\n",
            "[2,  1070] train_loss: 0.315 train_accuracy: 0.821 test_accuracy: 0.794\n",
            "[2,  1075] train_loss: 0.769 train_accuracy: 0.843 test_accuracy: 0.828\n",
            "[2,  1080] train_loss: 0.233 train_accuracy: 0.838 test_accuracy: 0.835\n",
            "[2,  1085] train_loss: 0.475 train_accuracy: 0.857 test_accuracy: 0.841\n",
            "[2,  1090] train_loss: 0.342 train_accuracy: 0.862 test_accuracy: 0.844\n",
            "[2,  1095] train_loss: 0.698 train_accuracy: 0.850 test_accuracy: 0.833\n",
            "[2,  1100] train_loss: 0.641 train_accuracy: 0.811 test_accuracy: 0.809\n",
            "[2,  1105] train_loss: 0.666 train_accuracy: 0.777 test_accuracy: 0.799\n",
            "[2,  1110] train_loss: 0.319 train_accuracy: 0.779 test_accuracy: 0.797\n",
            "[2,  1115] train_loss: 1.035 train_accuracy: 0.842 test_accuracy: 0.831\n",
            "[2,  1120] train_loss: 0.445 train_accuracy: 0.854 test_accuracy: 0.837\n",
            "[2,  1125] train_loss: 0.316 train_accuracy: 0.865 test_accuracy: 0.847\n",
            "[2,  1130] train_loss: 0.438 train_accuracy: 0.857 test_accuracy: 0.846\n",
            "[2,  1135] train_loss: 0.677 train_accuracy: 0.838 test_accuracy: 0.842\n",
            "[2,  1140] train_loss: 0.650 train_accuracy: 0.837 test_accuracy: 0.837\n",
            "[2,  1145] train_loss: 1.170 train_accuracy: 0.843 test_accuracy: 0.826\n",
            "[2,  1150] train_loss: 0.396 train_accuracy: 0.840 test_accuracy: 0.816\n",
            "[2,  1155] train_loss: 0.392 train_accuracy: 0.838 test_accuracy: 0.802\n",
            "[2,  1160] train_loss: 0.244 train_accuracy: 0.832 test_accuracy: 0.795\n",
            "[2,  1165] train_loss: 0.774 train_accuracy: 0.834 test_accuracy: 0.795\n",
            "[2,  1170] train_loss: 0.941 train_accuracy: 0.846 test_accuracy: 0.814\n",
            "[2,  1175] train_loss: 0.609 train_accuracy: 0.851 test_accuracy: 0.815\n",
            "[2,  1180] train_loss: 0.649 train_accuracy: 0.852 test_accuracy: 0.834\n",
            "[2,  1185] train_loss: 0.360 train_accuracy: 0.847 test_accuracy: 0.841\n",
            "[2,  1190] train_loss: 0.238 train_accuracy: 0.823 test_accuracy: 0.830\n",
            "[2,  1195] train_loss: 0.409 train_accuracy: 0.798 test_accuracy: 0.817\n",
            "[2,  1200] train_loss: 0.475 train_accuracy: 0.813 test_accuracy: 0.833\n",
            "[2,  1205] train_loss: 0.693 train_accuracy: 0.832 test_accuracy: 0.838\n",
            "[2,  1210] train_loss: 0.353 train_accuracy: 0.786 test_accuracy: 0.786\n",
            "[2,  1215] train_loss: 0.492 train_accuracy: 0.808 test_accuracy: 0.796\n",
            "[2,  1220] train_loss: 1.488 train_accuracy: 0.848 test_accuracy: 0.826\n",
            "[2,  1225] train_loss: 0.599 train_accuracy: 0.863 test_accuracy: 0.843\n",
            "[2,  1230] train_loss: 0.523 train_accuracy: 0.861 test_accuracy: 0.837\n",
            "[2,  1235] train_loss: 0.698 train_accuracy: 0.847 test_accuracy: 0.830\n",
            "[2,  1240] train_loss: 0.386 train_accuracy: 0.821 test_accuracy: 0.802\n",
            "[2,  1245] train_loss: 0.558 train_accuracy: 0.828 test_accuracy: 0.807\n",
            "[2,  1250] train_loss: 0.652 train_accuracy: 0.821 test_accuracy: 0.800\n",
            "[2,  1255] train_loss: 0.714 train_accuracy: 0.827 test_accuracy: 0.802\n",
            "[2,  1260] train_loss: 0.753 train_accuracy: 0.817 test_accuracy: 0.799\n",
            "[2,  1265] train_loss: 0.540 train_accuracy: 0.802 test_accuracy: 0.790\n",
            "[2,  1270] train_loss: 0.421 train_accuracy: 0.806 test_accuracy: 0.796\n",
            "[2,  1275] train_loss: 0.729 train_accuracy: 0.805 test_accuracy: 0.796\n",
            "[2,  1280] train_loss: 0.428 train_accuracy: 0.802 test_accuracy: 0.783\n",
            "[2,  1285] train_loss: 0.486 train_accuracy: 0.805 test_accuracy: 0.787\n",
            "[2,  1290] train_loss: 0.451 train_accuracy: 0.819 test_accuracy: 0.811\n",
            "[2,  1295] train_loss: 0.308 train_accuracy: 0.820 test_accuracy: 0.821\n",
            "[2,  1300] train_loss: 0.678 train_accuracy: 0.817 test_accuracy: 0.828\n",
            "[2,  1305] train_loss: 0.585 train_accuracy: 0.800 test_accuracy: 0.796\n",
            "[2,  1310] train_loss: 1.186 train_accuracy: 0.830 test_accuracy: 0.827\n",
            "[2,  1315] train_loss: 0.564 train_accuracy: 0.868 test_accuracy: 0.851\n",
            "[2,  1320] train_loss: 0.549 train_accuracy: 0.856 test_accuracy: 0.840\n",
            "[2,  1325] train_loss: 0.779 train_accuracy: 0.828 test_accuracy: 0.817\n",
            "[2,  1330] train_loss: 0.402 train_accuracy: 0.844 test_accuracy: 0.845\n",
            "[2,  1335] train_loss: 0.414 train_accuracy: 0.855 test_accuracy: 0.845\n",
            "[2,  1340] train_loss: 0.671 train_accuracy: 0.844 test_accuracy: 0.812\n",
            "[2,  1345] train_loss: 0.556 train_accuracy: 0.835 test_accuracy: 0.804\n",
            "[2,  1350] train_loss: 0.411 train_accuracy: 0.832 test_accuracy: 0.806\n",
            "[2,  1355] train_loss: 0.240 train_accuracy: 0.835 test_accuracy: 0.807\n",
            "[2,  1360] train_loss: 0.472 train_accuracy: 0.837 test_accuracy: 0.809\n",
            "[2,  1365] train_loss: 0.953 train_accuracy: 0.823 test_accuracy: 0.796\n",
            "[2,  1370] train_loss: 0.726 train_accuracy: 0.808 test_accuracy: 0.799\n",
            "[2,  1375] train_loss: 0.455 train_accuracy: 0.812 test_accuracy: 0.813\n",
            "[2,  1380] train_loss: 0.872 train_accuracy: 0.830 test_accuracy: 0.835\n",
            "[2,  1385] train_loss: 0.271 train_accuracy: 0.841 test_accuracy: 0.854\n",
            "[2,  1390] train_loss: 0.617 train_accuracy: 0.854 test_accuracy: 0.861\n",
            "[2,  1395] train_loss: 0.436 train_accuracy: 0.808 test_accuracy: 0.808\n",
            "[2,  1400] train_loss: 0.588 train_accuracy: 0.774 test_accuracy: 0.778\n",
            "[2,  1405] train_loss: 0.367 train_accuracy: 0.787 test_accuracy: 0.805\n",
            "[2,  1410] train_loss: 0.606 train_accuracy: 0.802 test_accuracy: 0.818\n",
            "[2,  1415] train_loss: 0.693 train_accuracy: 0.845 test_accuracy: 0.856\n",
            "[2,  1420] train_loss: 0.285 train_accuracy: 0.829 test_accuracy: 0.836\n",
            "[2,  1425] train_loss: 0.819 train_accuracy: 0.810 test_accuracy: 0.820\n",
            "[2,  1430] train_loss: 0.571 train_accuracy: 0.799 test_accuracy: 0.786\n",
            "[2,  1435] train_loss: 0.397 train_accuracy: 0.783 test_accuracy: 0.749\n",
            "[2,  1440] train_loss: 0.462 train_accuracy: 0.780 test_accuracy: 0.745\n",
            "[2,  1445] train_loss: 1.092 train_accuracy: 0.823 test_accuracy: 0.790\n",
            "[2,  1450] train_loss: 0.797 train_accuracy: 0.816 test_accuracy: 0.804\n",
            "[2,  1455] train_loss: 0.618 train_accuracy: 0.790 test_accuracy: 0.787\n",
            "[2,  1460] train_loss: 0.182 train_accuracy: 0.755 test_accuracy: 0.769\n",
            "[2,  1465] train_loss: 1.121 train_accuracy: 0.807 test_accuracy: 0.812\n",
            "[2,  1470] train_loss: 0.696 train_accuracy: 0.800 test_accuracy: 0.787\n",
            "[2,  1475] train_loss: 0.497 train_accuracy: 0.844 test_accuracy: 0.818\n",
            "[2,  1480] train_loss: 0.692 train_accuracy: 0.810 test_accuracy: 0.780\n",
            "[2,  1485] train_loss: 0.814 train_accuracy: 0.821 test_accuracy: 0.798\n",
            "[2,  1490] train_loss: 0.208 train_accuracy: 0.839 test_accuracy: 0.828\n",
            "[2,  1495] train_loss: 0.235 train_accuracy: 0.847 test_accuracy: 0.828\n",
            "[2,  1500] train_loss: 0.844 train_accuracy: 0.835 test_accuracy: 0.816\n",
            "[2,  1505] train_loss: 0.894 train_accuracy: 0.799 test_accuracy: 0.800\n",
            "[2,  1510] train_loss: 1.029 train_accuracy: 0.788 test_accuracy: 0.777\n",
            "[2,  1515] train_loss: 0.653 train_accuracy: 0.794 test_accuracy: 0.765\n",
            "[2,  1520] train_loss: 0.792 train_accuracy: 0.812 test_accuracy: 0.785\n",
            "[2,  1525] train_loss: 0.888 train_accuracy: 0.807 test_accuracy: 0.794\n",
            "[2,  1530] train_loss: 0.310 train_accuracy: 0.817 test_accuracy: 0.802\n",
            "[2,  1535] train_loss: 0.585 train_accuracy: 0.821 test_accuracy: 0.815\n",
            "[2,  1540] train_loss: 0.566 train_accuracy: 0.815 test_accuracy: 0.802\n",
            "[2,  1545] train_loss: 0.314 train_accuracy: 0.821 test_accuracy: 0.808\n",
            "[2,  1550] train_loss: 0.713 train_accuracy: 0.831 test_accuracy: 0.816\n",
            "[2,  1555] train_loss: 0.883 train_accuracy: 0.834 test_accuracy: 0.813\n",
            "[2,  1560] train_loss: 0.579 train_accuracy: 0.826 test_accuracy: 0.821\n",
            "[2,  1565] train_loss: 0.330 train_accuracy: 0.838 test_accuracy: 0.823\n",
            "[2,  1570] train_loss: 0.376 train_accuracy: 0.826 test_accuracy: 0.803\n",
            "[2,  1575] train_loss: 0.530 train_accuracy: 0.805 test_accuracy: 0.772\n",
            "[2,  1580] train_loss: 1.212 train_accuracy: 0.804 test_accuracy: 0.774\n",
            "[2,  1585] train_loss: 0.950 train_accuracy: 0.800 test_accuracy: 0.781\n",
            "[2,  1590] train_loss: 1.000 train_accuracy: 0.796 test_accuracy: 0.776\n",
            "[2,  1595] train_loss: 0.618 train_accuracy: 0.793 test_accuracy: 0.765\n",
            "[2,  1600] train_loss: 0.564 train_accuracy: 0.785 test_accuracy: 0.754\n",
            "[2,  1605] train_loss: 0.901 train_accuracy: 0.800 test_accuracy: 0.772\n",
            "[2,  1610] train_loss: 1.033 train_accuracy: 0.832 test_accuracy: 0.830\n",
            "[2,  1615] train_loss: 0.770 train_accuracy: 0.790 test_accuracy: 0.776\n",
            "[2,  1620] train_loss: 0.330 train_accuracy: 0.794 test_accuracy: 0.777\n",
            "[2,  1625] train_loss: 0.496 train_accuracy: 0.808 test_accuracy: 0.797\n",
            "[2,  1630] train_loss: 0.374 train_accuracy: 0.839 test_accuracy: 0.829\n",
            "[2,  1635] train_loss: 0.349 train_accuracy: 0.853 test_accuracy: 0.846\n",
            "[2,  1640] train_loss: 0.448 train_accuracy: 0.838 test_accuracy: 0.838\n",
            "[2,  1645] train_loss: 0.362 train_accuracy: 0.841 test_accuracy: 0.822\n",
            "[2,  1650] train_loss: 0.422 train_accuracy: 0.832 test_accuracy: 0.807\n",
            "[2,  1655] train_loss: 0.425 train_accuracy: 0.850 test_accuracy: 0.835\n",
            "[2,  1660] train_loss: 0.731 train_accuracy: 0.851 test_accuracy: 0.833\n",
            "[2,  1665] train_loss: 0.318 train_accuracy: 0.839 test_accuracy: 0.819\n",
            "[2,  1670] train_loss: 0.381 train_accuracy: 0.840 test_accuracy: 0.822\n",
            "[2,  1675] train_loss: 0.657 train_accuracy: 0.831 test_accuracy: 0.821\n",
            "[2,  1680] train_loss: 0.484 train_accuracy: 0.787 test_accuracy: 0.781\n",
            "[2,  1685] train_loss: 0.642 train_accuracy: 0.823 test_accuracy: 0.814\n",
            "[2,  1690] train_loss: 0.916 train_accuracy: 0.826 test_accuracy: 0.798\n",
            "[2,  1695] train_loss: 0.551 train_accuracy: 0.830 test_accuracy: 0.813\n",
            "[2,  1700] train_loss: 0.600 train_accuracy: 0.804 test_accuracy: 0.800\n",
            "[2,  1705] train_loss: 0.572 train_accuracy: 0.790 test_accuracy: 0.777\n",
            "[2,  1710] train_loss: 1.556 train_accuracy: 0.820 test_accuracy: 0.808\n",
            "[2,  1715] train_loss: 0.728 train_accuracy: 0.716 test_accuracy: 0.701\n",
            "[2,  1720] train_loss: 0.849 train_accuracy: 0.754 test_accuracy: 0.735\n",
            "[2,  1725] train_loss: 0.215 train_accuracy: 0.816 test_accuracy: 0.808\n",
            "[3,     5] train_loss: 1.038 train_accuracy: 0.837 test_accuracy: 0.825\n",
            "[3,    10] train_loss: 1.104 train_accuracy: 0.822 test_accuracy: 0.814\n",
            "[3,    15] train_loss: 0.318 train_accuracy: 0.808 test_accuracy: 0.814\n",
            "[3,    20] train_loss: 0.509 train_accuracy: 0.796 test_accuracy: 0.789\n",
            "[3,    25] train_loss: 0.633 train_accuracy: 0.843 test_accuracy: 0.815\n",
            "[3,    30] train_loss: 0.619 train_accuracy: 0.850 test_accuracy: 0.825\n",
            "[3,    35] train_loss: 0.844 train_accuracy: 0.830 test_accuracy: 0.791\n",
            "[3,    40] train_loss: 0.676 train_accuracy: 0.837 test_accuracy: 0.798\n",
            "[3,    45] train_loss: 0.161 train_accuracy: 0.855 test_accuracy: 0.833\n",
            "[3,    50] train_loss: 0.658 train_accuracy: 0.833 test_accuracy: 0.829\n",
            "[3,    55] train_loss: 0.452 train_accuracy: 0.801 test_accuracy: 0.797\n",
            "[3,    60] train_loss: 1.081 train_accuracy: 0.838 test_accuracy: 0.816\n",
            "[3,    65] train_loss: 0.813 train_accuracy: 0.866 test_accuracy: 0.849\n",
            "[3,    70] train_loss: 0.190 train_accuracy: 0.842 test_accuracy: 0.848\n",
            "[3,    75] train_loss: 0.546 train_accuracy: 0.823 test_accuracy: 0.849\n",
            "[3,    80] train_loss: 0.332 train_accuracy: 0.833 test_accuracy: 0.863\n",
            "[3,    85] train_loss: 0.422 train_accuracy: 0.840 test_accuracy: 0.862\n",
            "[3,    90] train_loss: 0.502 train_accuracy: 0.853 test_accuracy: 0.840\n",
            "[3,    95] train_loss: 0.796 train_accuracy: 0.824 test_accuracy: 0.792\n",
            "[3,   100] train_loss: 0.492 train_accuracy: 0.819 test_accuracy: 0.795\n",
            "[3,   105] train_loss: 0.702 train_accuracy: 0.822 test_accuracy: 0.816\n",
            "[3,   110] train_loss: 0.376 train_accuracy: 0.817 test_accuracy: 0.825\n",
            "[3,   115] train_loss: 0.503 train_accuracy: 0.837 test_accuracy: 0.835\n",
            "[3,   120] train_loss: 0.772 train_accuracy: 0.853 test_accuracy: 0.842\n",
            "[3,   125] train_loss: 1.005 train_accuracy: 0.871 test_accuracy: 0.860\n",
            "[3,   130] train_loss: 0.324 train_accuracy: 0.866 test_accuracy: 0.850\n",
            "[3,   135] train_loss: 0.393 train_accuracy: 0.870 test_accuracy: 0.851\n",
            "[3,   140] train_loss: 0.684 train_accuracy: 0.862 test_accuracy: 0.841\n",
            "[3,   145] train_loss: 0.903 train_accuracy: 0.850 test_accuracy: 0.829\n",
            "[3,   150] train_loss: 0.626 train_accuracy: 0.807 test_accuracy: 0.792\n",
            "[3,   155] train_loss: 0.681 train_accuracy: 0.836 test_accuracy: 0.825\n",
            "[3,   160] train_loss: 0.423 train_accuracy: 0.851 test_accuracy: 0.828\n",
            "[3,   165] train_loss: 0.355 train_accuracy: 0.834 test_accuracy: 0.804\n",
            "[3,   170] train_loss: 0.424 train_accuracy: 0.823 test_accuracy: 0.797\n",
            "[3,   175] train_loss: 0.408 train_accuracy: 0.819 test_accuracy: 0.798\n",
            "[3,   180] train_loss: 0.829 train_accuracy: 0.800 test_accuracy: 0.769\n",
            "[3,   185] train_loss: 0.614 train_accuracy: 0.792 test_accuracy: 0.754\n",
            "[3,   190] train_loss: 0.487 train_accuracy: 0.785 test_accuracy: 0.753\n",
            "[3,   195] train_loss: 0.589 train_accuracy: 0.802 test_accuracy: 0.775\n",
            "[3,   200] train_loss: 0.756 train_accuracy: 0.808 test_accuracy: 0.774\n",
            "[3,   205] train_loss: 0.597 train_accuracy: 0.821 test_accuracy: 0.804\n",
            "[3,   210] train_loss: 0.196 train_accuracy: 0.807 test_accuracy: 0.807\n",
            "[3,   215] train_loss: 0.260 train_accuracy: 0.817 test_accuracy: 0.823\n",
            "[3,   220] train_loss: 0.484 train_accuracy: 0.840 test_accuracy: 0.836\n",
            "[3,   225] train_loss: 0.250 train_accuracy: 0.838 test_accuracy: 0.821\n",
            "[3,   230] train_loss: 0.366 train_accuracy: 0.837 test_accuracy: 0.823\n",
            "[3,   235] train_loss: 0.342 train_accuracy: 0.806 test_accuracy: 0.801\n",
            "[3,   240] train_loss: 0.325 train_accuracy: 0.798 test_accuracy: 0.805\n",
            "[3,   245] train_loss: 0.821 train_accuracy: 0.831 test_accuracy: 0.825\n",
            "[3,   250] train_loss: 0.696 train_accuracy: 0.837 test_accuracy: 0.807\n",
            "[3,   255] train_loss: 0.648 train_accuracy: 0.804 test_accuracy: 0.762\n",
            "[3,   260] train_loss: 0.703 train_accuracy: 0.755 test_accuracy: 0.727\n",
            "[3,   265] train_loss: 0.967 train_accuracy: 0.787 test_accuracy: 0.783\n",
            "[3,   270] train_loss: 0.347 train_accuracy: 0.802 test_accuracy: 0.802\n",
            "[3,   275] train_loss: 0.513 train_accuracy: 0.810 test_accuracy: 0.801\n",
            "[3,   280] train_loss: 0.608 train_accuracy: 0.847 test_accuracy: 0.828\n",
            "[3,   285] train_loss: 0.696 train_accuracy: 0.839 test_accuracy: 0.810\n",
            "[3,   290] train_loss: 0.483 train_accuracy: 0.816 test_accuracy: 0.795\n",
            "[3,   295] train_loss: 0.560 train_accuracy: 0.832 test_accuracy: 0.808\n",
            "[3,   300] train_loss: 0.746 train_accuracy: 0.794 test_accuracy: 0.792\n",
            "[3,   305] train_loss: 0.566 train_accuracy: 0.800 test_accuracy: 0.790\n",
            "[3,   310] train_loss: 0.409 train_accuracy: 0.798 test_accuracy: 0.781\n",
            "[3,   315] train_loss: 0.284 train_accuracy: 0.791 test_accuracy: 0.772\n",
            "[3,   320] train_loss: 0.441 train_accuracy: 0.822 test_accuracy: 0.802\n",
            "[3,   325] train_loss: 0.239 train_accuracy: 0.837 test_accuracy: 0.817\n",
            "[3,   330] train_loss: 0.339 train_accuracy: 0.827 test_accuracy: 0.823\n",
            "[3,   335] train_loss: 0.482 train_accuracy: 0.785 test_accuracy: 0.798\n",
            "[3,   340] train_loss: 0.760 train_accuracy: 0.851 test_accuracy: 0.858\n",
            "[3,   345] train_loss: 0.423 train_accuracy: 0.818 test_accuracy: 0.835\n",
            "[3,   350] train_loss: 0.676 train_accuracy: 0.775 test_accuracy: 0.793\n",
            "[3,   355] train_loss: 1.098 train_accuracy: 0.760 test_accuracy: 0.769\n",
            "[3,   360] train_loss: 0.739 train_accuracy: 0.785 test_accuracy: 0.775\n",
            "[3,   365] train_loss: 0.853 train_accuracy: 0.775 test_accuracy: 0.751\n",
            "[3,   370] train_loss: 0.534 train_accuracy: 0.784 test_accuracy: 0.752\n",
            "[3,   375] train_loss: 0.681 train_accuracy: 0.809 test_accuracy: 0.787\n",
            "[3,   380] train_loss: 1.148 train_accuracy: 0.842 test_accuracy: 0.818\n",
            "[3,   385] train_loss: 0.837 train_accuracy: 0.786 test_accuracy: 0.756\n",
            "[3,   390] train_loss: 0.974 train_accuracy: 0.803 test_accuracy: 0.758\n",
            "[3,   395] train_loss: 0.557 train_accuracy: 0.795 test_accuracy: 0.771\n",
            "[3,   400] train_loss: 0.137 train_accuracy: 0.795 test_accuracy: 0.786\n",
            "[3,   405] train_loss: 0.338 train_accuracy: 0.830 test_accuracy: 0.828\n",
            "[3,   410] train_loss: 0.444 train_accuracy: 0.821 test_accuracy: 0.834\n",
            "[3,   415] train_loss: 0.658 train_accuracy: 0.812 test_accuracy: 0.825\n",
            "[3,   420] train_loss: 0.756 train_accuracy: 0.802 test_accuracy: 0.794\n",
            "[3,   425] train_loss: 1.013 train_accuracy: 0.753 test_accuracy: 0.733\n",
            "[3,   430] train_loss: 0.682 train_accuracy: 0.690 test_accuracy: 0.661\n",
            "[3,   435] train_loss: 1.237 train_accuracy: 0.754 test_accuracy: 0.724\n",
            "[3,   440] train_loss: 0.863 train_accuracy: 0.753 test_accuracy: 0.718\n",
            "[3,   445] train_loss: 1.707 train_accuracy: 0.783 test_accuracy: 0.774\n",
            "[3,   450] train_loss: 0.593 train_accuracy: 0.742 test_accuracy: 0.763\n",
            "[3,   455] train_loss: 0.681 train_accuracy: 0.575 test_accuracy: 0.617\n",
            "[3,   460] train_loss: 1.751 train_accuracy: 0.716 test_accuracy: 0.728\n",
            "[3,   465] train_loss: 1.362 train_accuracy: 0.670 test_accuracy: 0.711\n",
            "[3,   470] train_loss: 1.000 train_accuracy: 0.685 test_accuracy: 0.698\n",
            "[3,   475] train_loss: 1.371 train_accuracy: 0.601 test_accuracy: 0.569\n",
            "[3,   480] train_loss: 1.170 train_accuracy: 0.668 test_accuracy: 0.664\n",
            "[3,   485] train_loss: 1.129 train_accuracy: 0.670 test_accuracy: 0.642\n",
            "[3,   490] train_loss: 1.405 train_accuracy: 0.713 test_accuracy: 0.705\n",
            "[3,   495] train_loss: 1.574 train_accuracy: 0.768 test_accuracy: 0.795\n",
            "[3,   500] train_loss: 0.769 train_accuracy: 0.640 test_accuracy: 0.670\n",
            "[3,   505] train_loss: 1.130 train_accuracy: 0.609 test_accuracy: 0.616\n",
            "[3,   510] train_loss: 1.621 train_accuracy: 0.702 test_accuracy: 0.699\n",
            "[3,   515] train_loss: 1.188 train_accuracy: 0.686 test_accuracy: 0.711\n",
            "[3,   520] train_loss: 0.591 train_accuracy: 0.625 test_accuracy: 0.651\n",
            "[3,   525] train_loss: 2.078 train_accuracy: 0.718 test_accuracy: 0.723\n",
            "[3,   530] train_loss: 2.054 train_accuracy: 0.658 test_accuracy: 0.614\n",
            "[3,   535] train_loss: 0.885 train_accuracy: 0.637 test_accuracy: 0.586\n",
            "[3,   540] train_loss: 1.185 train_accuracy: 0.720 test_accuracy: 0.698\n",
            "[3,   545] train_loss: 1.289 train_accuracy: 0.691 test_accuracy: 0.733\n",
            "[3,   550] train_loss: 1.088 train_accuracy: 0.634 test_accuracy: 0.629\n",
            "[3,   555] train_loss: 1.587 train_accuracy: 0.627 test_accuracy: 0.620\n",
            "[3,   560] train_loss: 1.387 train_accuracy: 0.725 test_accuracy: 0.715\n",
            "[3,   565] train_loss: 1.157 train_accuracy: 0.681 test_accuracy: 0.646\n",
            "[3,   570] train_loss: 0.863 train_accuracy: 0.686 test_accuracy: 0.672\n",
            "[3,   575] train_loss: 0.907 train_accuracy: 0.776 test_accuracy: 0.762\n",
            "[3,   580] train_loss: 0.590 train_accuracy: 0.795 test_accuracy: 0.788\n",
            "[3,   585] train_loss: 0.946 train_accuracy: 0.762 test_accuracy: 0.772\n",
            "[3,   590] train_loss: 0.724 train_accuracy: 0.769 test_accuracy: 0.779\n",
            "[3,   595] train_loss: 0.936 train_accuracy: 0.771 test_accuracy: 0.749\n",
            "[3,   600] train_loss: 0.759 train_accuracy: 0.719 test_accuracy: 0.681\n",
            "[3,   605] train_loss: 0.877 train_accuracy: 0.710 test_accuracy: 0.665\n",
            "[3,   610] train_loss: 0.689 train_accuracy: 0.723 test_accuracy: 0.694\n",
            "[3,   615] train_loss: 0.786 train_accuracy: 0.660 test_accuracy: 0.624\n",
            "[3,   620] train_loss: 1.423 train_accuracy: 0.766 test_accuracy: 0.746\n",
            "[3,   625] train_loss: 0.581 train_accuracy: 0.829 test_accuracy: 0.809\n",
            "[3,   630] train_loss: 0.758 train_accuracy: 0.835 test_accuracy: 0.823\n",
            "[3,   635] train_loss: 0.438 train_accuracy: 0.830 test_accuracy: 0.819\n",
            "[3,   640] train_loss: 0.356 train_accuracy: 0.835 test_accuracy: 0.824\n",
            "[3,   645] train_loss: 0.245 train_accuracy: 0.848 test_accuracy: 0.833\n",
            "[3,   650] train_loss: 0.309 train_accuracy: 0.856 test_accuracy: 0.833\n",
            "[3,   655] train_loss: 0.121 train_accuracy: 0.823 test_accuracy: 0.789\n",
            "[3,   660] train_loss: 0.440 train_accuracy: 0.826 test_accuracy: 0.804\n",
            "[3,   665] train_loss: 0.406 train_accuracy: 0.828 test_accuracy: 0.810\n",
            "[3,   670] train_loss: 0.507 train_accuracy: 0.828 test_accuracy: 0.809\n",
            "[3,   675] train_loss: 0.955 train_accuracy: 0.830 test_accuracy: 0.810\n",
            "[3,   680] train_loss: 0.671 train_accuracy: 0.835 test_accuracy: 0.809\n",
            "[3,   685] train_loss: 0.446 train_accuracy: 0.827 test_accuracy: 0.807\n",
            "[3,   690] train_loss: 0.575 train_accuracy: 0.817 test_accuracy: 0.801\n",
            "[3,   695] train_loss: 0.625 train_accuracy: 0.813 test_accuracy: 0.793\n",
            "[3,   700] train_loss: 0.585 train_accuracy: 0.781 test_accuracy: 0.774\n",
            "[3,   705] train_loss: 0.901 train_accuracy: 0.796 test_accuracy: 0.773\n",
            "[3,   710] train_loss: 0.548 train_accuracy: 0.756 test_accuracy: 0.746\n",
            "[3,   715] train_loss: 0.543 train_accuracy: 0.765 test_accuracy: 0.763\n",
            "[3,   720] train_loss: 0.647 train_accuracy: 0.780 test_accuracy: 0.782\n",
            "[3,   725] train_loss: 0.495 train_accuracy: 0.762 test_accuracy: 0.756\n",
            "[3,   730] train_loss: 0.464 train_accuracy: 0.808 test_accuracy: 0.796\n",
            "[3,   735] train_loss: 0.869 train_accuracy: 0.811 test_accuracy: 0.794\n",
            "[3,   740] train_loss: 0.523 train_accuracy: 0.768 test_accuracy: 0.749\n",
            "[3,   745] train_loss: 0.505 train_accuracy: 0.816 test_accuracy: 0.801\n",
            "[3,   750] train_loss: 0.433 train_accuracy: 0.810 test_accuracy: 0.800\n",
            "[3,   755] train_loss: 0.526 train_accuracy: 0.815 test_accuracy: 0.801\n",
            "[3,   760] train_loss: 0.976 train_accuracy: 0.836 test_accuracy: 0.814\n",
            "[3,   765] train_loss: 0.601 train_accuracy: 0.847 test_accuracy: 0.824\n",
            "[3,   770] train_loss: 0.535 train_accuracy: 0.860 test_accuracy: 0.839\n",
            "[3,   775] train_loss: 0.426 train_accuracy: 0.867 test_accuracy: 0.848\n",
            "[3,   780] train_loss: 0.323 train_accuracy: 0.859 test_accuracy: 0.837\n",
            "[3,   785] train_loss: 0.719 train_accuracy: 0.852 test_accuracy: 0.837\n",
            "[3,   790] train_loss: 0.452 train_accuracy: 0.838 test_accuracy: 0.830\n",
            "[3,   795] train_loss: 0.521 train_accuracy: 0.846 test_accuracy: 0.849\n",
            "[3,   800] train_loss: 0.752 train_accuracy: 0.856 test_accuracy: 0.855\n",
            "[3,   805] train_loss: 0.422 train_accuracy: 0.873 test_accuracy: 0.862\n",
            "[3,   810] train_loss: 0.460 train_accuracy: 0.872 test_accuracy: 0.855\n",
            "[3,   815] train_loss: 1.137 train_accuracy: 0.867 test_accuracy: 0.851\n",
            "[3,   820] train_loss: 0.305 train_accuracy: 0.838 test_accuracy: 0.817\n",
            "[3,   825] train_loss: 0.736 train_accuracy: 0.847 test_accuracy: 0.825\n",
            "[3,   830] train_loss: 0.695 train_accuracy: 0.850 test_accuracy: 0.840\n",
            "[3,   835] train_loss: 0.463 train_accuracy: 0.833 test_accuracy: 0.841\n",
            "[3,   840] train_loss: 0.667 train_accuracy: 0.841 test_accuracy: 0.854\n",
            "[3,   845] train_loss: 0.340 train_accuracy: 0.837 test_accuracy: 0.857\n",
            "[3,   850] train_loss: 0.187 train_accuracy: 0.828 test_accuracy: 0.839\n",
            "[3,   855] train_loss: 0.460 train_accuracy: 0.828 test_accuracy: 0.824\n",
            "[3,   860] train_loss: 0.480 train_accuracy: 0.843 test_accuracy: 0.837\n",
            "[3,   865] train_loss: 0.554 train_accuracy: 0.868 test_accuracy: 0.861\n",
            "[3,   870] train_loss: 0.572 train_accuracy: 0.876 test_accuracy: 0.869\n",
            "[3,   875] train_loss: 0.345 train_accuracy: 0.882 test_accuracy: 0.864\n",
            "[3,   880] train_loss: 0.617 train_accuracy: 0.888 test_accuracy: 0.858\n",
            "[3,   885] train_loss: 0.662 train_accuracy: 0.877 test_accuracy: 0.848\n",
            "[3,   890] train_loss: 0.656 train_accuracy: 0.829 test_accuracy: 0.809\n",
            "[3,   895] train_loss: 0.424 train_accuracy: 0.816 test_accuracy: 0.794\n",
            "[3,   900] train_loss: 0.482 train_accuracy: 0.823 test_accuracy: 0.805\n",
            "[3,   905] train_loss: 0.670 train_accuracy: 0.827 test_accuracy: 0.813\n",
            "[3,   910] train_loss: 0.372 train_accuracy: 0.833 test_accuracy: 0.825\n",
            "[3,   915] train_loss: 0.345 train_accuracy: 0.836 test_accuracy: 0.832\n",
            "[3,   920] train_loss: 0.316 train_accuracy: 0.849 test_accuracy: 0.843\n",
            "[3,   925] train_loss: 0.491 train_accuracy: 0.861 test_accuracy: 0.852\n",
            "[3,   930] train_loss: 0.497 train_accuracy: 0.844 test_accuracy: 0.843\n",
            "[3,   935] train_loss: 0.945 train_accuracy: 0.855 test_accuracy: 0.858\n",
            "[3,   940] train_loss: 0.297 train_accuracy: 0.863 test_accuracy: 0.858\n",
            "[3,   945] train_loss: 0.895 train_accuracy: 0.866 test_accuracy: 0.855\n",
            "[3,   950] train_loss: 0.234 train_accuracy: 0.863 test_accuracy: 0.841\n",
            "[3,   955] train_loss: 0.438 train_accuracy: 0.867 test_accuracy: 0.834\n",
            "[3,   960] train_loss: 0.386 train_accuracy: 0.873 test_accuracy: 0.842\n",
            "[3,   965] train_loss: 0.352 train_accuracy: 0.876 test_accuracy: 0.849\n",
            "[3,   970] train_loss: 0.462 train_accuracy: 0.867 test_accuracy: 0.838\n",
            "[3,   975] train_loss: 0.348 train_accuracy: 0.844 test_accuracy: 0.814\n",
            "[3,   980] train_loss: 0.574 train_accuracy: 0.844 test_accuracy: 0.814\n",
            "[3,   985] train_loss: 0.387 train_accuracy: 0.852 test_accuracy: 0.818\n",
            "[3,   990] train_loss: 0.562 train_accuracy: 0.858 test_accuracy: 0.829\n",
            "[3,   995] train_loss: 0.470 train_accuracy: 0.857 test_accuracy: 0.827\n",
            "[3,  1000] train_loss: 0.422 train_accuracy: 0.846 test_accuracy: 0.825\n",
            "[3,  1005] train_loss: 0.641 train_accuracy: 0.844 test_accuracy: 0.832\n",
            "[3,  1010] train_loss: 0.841 train_accuracy: 0.854 test_accuracy: 0.851\n",
            "[3,  1015] train_loss: 0.535 train_accuracy: 0.849 test_accuracy: 0.852\n",
            "[3,  1020] train_loss: 0.728 train_accuracy: 0.838 test_accuracy: 0.843\n",
            "[3,  1025] train_loss: 0.485 train_accuracy: 0.846 test_accuracy: 0.841\n",
            "[3,  1030] train_loss: 0.770 train_accuracy: 0.855 test_accuracy: 0.837\n",
            "[3,  1035] train_loss: 0.444 train_accuracy: 0.860 test_accuracy: 0.841\n",
            "[3,  1040] train_loss: 0.391 train_accuracy: 0.854 test_accuracy: 0.829\n",
            "[3,  1045] train_loss: 0.912 train_accuracy: 0.843 test_accuracy: 0.818\n",
            "[3,  1050] train_loss: 0.577 train_accuracy: 0.833 test_accuracy: 0.811\n",
            "[3,  1055] train_loss: 0.185 train_accuracy: 0.824 test_accuracy: 0.811\n",
            "[3,  1060] train_loss: 0.588 train_accuracy: 0.834 test_accuracy: 0.822\n",
            "[3,  1065] train_loss: 0.794 train_accuracy: 0.837 test_accuracy: 0.829\n",
            "[3,  1070] train_loss: 0.334 train_accuracy: 0.839 test_accuracy: 0.833\n",
            "[3,  1075] train_loss: 0.640 train_accuracy: 0.849 test_accuracy: 0.835\n",
            "[3,  1080] train_loss: 0.797 train_accuracy: 0.860 test_accuracy: 0.840\n",
            "[3,  1085] train_loss: 0.603 train_accuracy: 0.851 test_accuracy: 0.832\n",
            "[3,  1090] train_loss: 0.399 train_accuracy: 0.877 test_accuracy: 0.860\n",
            "[3,  1095] train_loss: 0.439 train_accuracy: 0.879 test_accuracy: 0.867\n",
            "[3,  1100] train_loss: 0.355 train_accuracy: 0.880 test_accuracy: 0.859\n",
            "[3,  1105] train_loss: 0.731 train_accuracy: 0.881 test_accuracy: 0.859\n",
            "[3,  1110] train_loss: 0.532 train_accuracy: 0.878 test_accuracy: 0.859\n",
            "[3,  1115] train_loss: 0.184 train_accuracy: 0.873 test_accuracy: 0.852\n",
            "[3,  1120] train_loss: 0.286 train_accuracy: 0.865 test_accuracy: 0.839\n",
            "[3,  1125] train_loss: 0.494 train_accuracy: 0.863 test_accuracy: 0.842\n",
            "[3,  1130] train_loss: 0.595 train_accuracy: 0.865 test_accuracy: 0.840\n",
            "[3,  1135] train_loss: 0.845 train_accuracy: 0.874 test_accuracy: 0.846\n",
            "[3,  1140] train_loss: 0.436 train_accuracy: 0.874 test_accuracy: 0.848\n",
            "[3,  1145] train_loss: 0.799 train_accuracy: 0.867 test_accuracy: 0.848\n",
            "[3,  1150] train_loss: 0.430 train_accuracy: 0.866 test_accuracy: 0.845\n",
            "[3,  1155] train_loss: 0.637 train_accuracy: 0.866 test_accuracy: 0.836\n",
            "[3,  1160] train_loss: 0.926 train_accuracy: 0.864 test_accuracy: 0.835\n",
            "[3,  1165] train_loss: 0.558 train_accuracy: 0.843 test_accuracy: 0.809\n",
            "[3,  1170] train_loss: 0.356 train_accuracy: 0.835 test_accuracy: 0.807\n",
            "[3,  1175] train_loss: 0.582 train_accuracy: 0.831 test_accuracy: 0.818\n",
            "[3,  1180] train_loss: 0.358 train_accuracy: 0.827 test_accuracy: 0.821\n",
            "[3,  1185] train_loss: 0.461 train_accuracy: 0.830 test_accuracy: 0.828\n",
            "[3,  1190] train_loss: 0.361 train_accuracy: 0.838 test_accuracy: 0.832\n",
            "[3,  1195] train_loss: 0.756 train_accuracy: 0.859 test_accuracy: 0.843\n",
            "[3,  1200] train_loss: 0.493 train_accuracy: 0.854 test_accuracy: 0.842\n",
            "[3,  1205] train_loss: 0.712 train_accuracy: 0.865 test_accuracy: 0.844\n",
            "[3,  1210] train_loss: 0.331 train_accuracy: 0.852 test_accuracy: 0.826\n",
            "[3,  1215] train_loss: 0.232 train_accuracy: 0.815 test_accuracy: 0.799\n",
            "[3,  1220] train_loss: 1.047 train_accuracy: 0.843 test_accuracy: 0.821\n",
            "[3,  1225] train_loss: 0.180 train_accuracy: 0.858 test_accuracy: 0.834\n",
            "[3,  1230] train_loss: 0.580 train_accuracy: 0.846 test_accuracy: 0.838\n",
            "[3,  1235] train_loss: 1.135 train_accuracy: 0.862 test_accuracy: 0.857\n",
            "[3,  1240] train_loss: 0.372 train_accuracy: 0.841 test_accuracy: 0.839\n",
            "[3,  1245] train_loss: 0.305 train_accuracy: 0.838 test_accuracy: 0.827\n",
            "[3,  1250] train_loss: 0.649 train_accuracy: 0.868 test_accuracy: 0.840\n",
            "[3,  1255] train_loss: 0.437 train_accuracy: 0.850 test_accuracy: 0.823\n",
            "[3,  1260] train_loss: 0.156 train_accuracy: 0.847 test_accuracy: 0.819\n",
            "[3,  1265] train_loss: 0.213 train_accuracy: 0.836 test_accuracy: 0.811\n",
            "[3,  1270] train_loss: 0.357 train_accuracy: 0.840 test_accuracy: 0.812\n",
            "[3,  1275] train_loss: 0.219 train_accuracy: 0.816 test_accuracy: 0.784\n",
            "[3,  1280] train_loss: 0.643 train_accuracy: 0.817 test_accuracy: 0.791\n",
            "[3,  1285] train_loss: 0.969 train_accuracy: 0.839 test_accuracy: 0.820\n",
            "[3,  1290] train_loss: 0.917 train_accuracy: 0.862 test_accuracy: 0.841\n",
            "[3,  1295] train_loss: 0.304 train_accuracy: 0.865 test_accuracy: 0.837\n",
            "[3,  1300] train_loss: 0.562 train_accuracy: 0.855 test_accuracy: 0.832\n",
            "[3,  1305] train_loss: 0.510 train_accuracy: 0.844 test_accuracy: 0.822\n",
            "[3,  1310] train_loss: 0.405 train_accuracy: 0.848 test_accuracy: 0.828\n",
            "[3,  1315] train_loss: 0.479 train_accuracy: 0.868 test_accuracy: 0.851\n",
            "[3,  1320] train_loss: 0.294 train_accuracy: 0.870 test_accuracy: 0.849\n",
            "[3,  1325] train_loss: 0.593 train_accuracy: 0.871 test_accuracy: 0.844\n",
            "[3,  1330] train_loss: 0.372 train_accuracy: 0.857 test_accuracy: 0.833\n",
            "[3,  1335] train_loss: 0.440 train_accuracy: 0.864 test_accuracy: 0.836\n",
            "[3,  1340] train_loss: 0.800 train_accuracy: 0.860 test_accuracy: 0.832\n",
            "[3,  1345] train_loss: 0.491 train_accuracy: 0.849 test_accuracy: 0.836\n",
            "[3,  1350] train_loss: 0.402 train_accuracy: 0.838 test_accuracy: 0.836\n",
            "[3,  1355] train_loss: 0.486 train_accuracy: 0.841 test_accuracy: 0.844\n",
            "[3,  1360] train_loss: 0.746 train_accuracy: 0.864 test_accuracy: 0.858\n",
            "[3,  1365] train_loss: 0.256 train_accuracy: 0.868 test_accuracy: 0.848\n",
            "[3,  1370] train_loss: 0.871 train_accuracy: 0.867 test_accuracy: 0.845\n",
            "[3,  1375] train_loss: 0.242 train_accuracy: 0.869 test_accuracy: 0.837\n",
            "[3,  1380] train_loss: 0.560 train_accuracy: 0.851 test_accuracy: 0.818\n",
            "[3,  1385] train_loss: 0.894 train_accuracy: 0.846 test_accuracy: 0.807\n",
            "[3,  1390] train_loss: 0.813 train_accuracy: 0.864 test_accuracy: 0.830\n",
            "[3,  1395] train_loss: 0.985 train_accuracy: 0.864 test_accuracy: 0.837\n",
            "[3,  1400] train_loss: 0.946 train_accuracy: 0.853 test_accuracy: 0.838\n",
            "[3,  1405] train_loss: 0.690 train_accuracy: 0.840 test_accuracy: 0.828\n",
            "[3,  1410] train_loss: 0.441 train_accuracy: 0.824 test_accuracy: 0.808\n",
            "[3,  1415] train_loss: 0.628 train_accuracy: 0.846 test_accuracy: 0.831\n",
            "[3,  1420] train_loss: 0.346 train_accuracy: 0.877 test_accuracy: 0.851\n",
            "[3,  1425] train_loss: 0.650 train_accuracy: 0.879 test_accuracy: 0.849\n",
            "[3,  1430] train_loss: 0.493 train_accuracy: 0.848 test_accuracy: 0.815\n",
            "[3,  1435] train_loss: 0.501 train_accuracy: 0.845 test_accuracy: 0.820\n",
            "[3,  1440] train_loss: 0.375 train_accuracy: 0.862 test_accuracy: 0.842\n",
            "[3,  1445] train_loss: 0.320 train_accuracy: 0.861 test_accuracy: 0.845\n",
            "[3,  1450] train_loss: 0.544 train_accuracy: 0.851 test_accuracy: 0.823\n",
            "[3,  1455] train_loss: 0.655 train_accuracy: 0.844 test_accuracy: 0.814\n",
            "[3,  1460] train_loss: 0.839 train_accuracy: 0.852 test_accuracy: 0.825\n",
            "[3,  1465] train_loss: 0.273 train_accuracy: 0.842 test_accuracy: 0.820\n",
            "[3,  1470] train_loss: 0.797 train_accuracy: 0.848 test_accuracy: 0.829\n",
            "[3,  1475] train_loss: 0.665 train_accuracy: 0.854 test_accuracy: 0.842\n",
            "[3,  1480] train_loss: 0.403 train_accuracy: 0.855 test_accuracy: 0.848\n",
            "[3,  1485] train_loss: 0.526 train_accuracy: 0.870 test_accuracy: 0.856\n",
            "[3,  1490] train_loss: 0.583 train_accuracy: 0.863 test_accuracy: 0.843\n",
            "[3,  1495] train_loss: 0.446 train_accuracy: 0.861 test_accuracy: 0.837\n",
            "[3,  1500] train_loss: 0.245 train_accuracy: 0.868 test_accuracy: 0.843\n",
            "[3,  1505] train_loss: 0.306 train_accuracy: 0.872 test_accuracy: 0.850\n",
            "[3,  1510] train_loss: 0.331 train_accuracy: 0.874 test_accuracy: 0.845\n",
            "[3,  1515] train_loss: 0.396 train_accuracy: 0.871 test_accuracy: 0.846\n",
            "[3,  1520] train_loss: 0.130 train_accuracy: 0.864 test_accuracy: 0.840\n",
            "[3,  1525] train_loss: 0.262 train_accuracy: 0.837 test_accuracy: 0.827\n",
            "[3,  1530] train_loss: 0.400 train_accuracy: 0.846 test_accuracy: 0.835\n",
            "[3,  1535] train_loss: 0.440 train_accuracy: 0.853 test_accuracy: 0.849\n",
            "[3,  1540] train_loss: 0.423 train_accuracy: 0.865 test_accuracy: 0.863\n",
            "[3,  1545] train_loss: 0.424 train_accuracy: 0.860 test_accuracy: 0.853\n",
            "[3,  1550] train_loss: 0.386 train_accuracy: 0.865 test_accuracy: 0.850\n",
            "[3,  1555] train_loss: 0.392 train_accuracy: 0.870 test_accuracy: 0.847\n",
            "[3,  1560] train_loss: 0.403 train_accuracy: 0.857 test_accuracy: 0.835\n",
            "[3,  1565] train_loss: 0.271 train_accuracy: 0.857 test_accuracy: 0.830\n",
            "[3,  1570] train_loss: 0.966 train_accuracy: 0.872 test_accuracy: 0.840\n",
            "[3,  1575] train_loss: 0.396 train_accuracy: 0.873 test_accuracy: 0.840\n",
            "[3,  1580] train_loss: 0.510 train_accuracy: 0.878 test_accuracy: 0.846\n",
            "[3,  1585] train_loss: 0.212 train_accuracy: 0.870 test_accuracy: 0.846\n",
            "[3,  1590] train_loss: 0.149 train_accuracy: 0.858 test_accuracy: 0.840\n",
            "[3,  1595] train_loss: 0.429 train_accuracy: 0.862 test_accuracy: 0.842\n",
            "[3,  1600] train_loss: 0.344 train_accuracy: 0.862 test_accuracy: 0.841\n",
            "[3,  1605] train_loss: 0.434 train_accuracy: 0.879 test_accuracy: 0.850\n",
            "[3,  1610] train_loss: 0.381 train_accuracy: 0.876 test_accuracy: 0.840\n",
            "[3,  1615] train_loss: 0.337 train_accuracy: 0.866 test_accuracy: 0.834\n",
            "[3,  1620] train_loss: 0.442 train_accuracy: 0.853 test_accuracy: 0.829\n",
            "[3,  1625] train_loss: 0.642 train_accuracy: 0.852 test_accuracy: 0.828\n",
            "[3,  1630] train_loss: 0.597 train_accuracy: 0.855 test_accuracy: 0.829\n",
            "[3,  1635] train_loss: 0.307 train_accuracy: 0.854 test_accuracy: 0.831\n",
            "[3,  1640] train_loss: 0.616 train_accuracy: 0.857 test_accuracy: 0.828\n",
            "[3,  1645] train_loss: 0.402 train_accuracy: 0.866 test_accuracy: 0.835\n",
            "[3,  1650] train_loss: 0.602 train_accuracy: 0.848 test_accuracy: 0.818\n",
            "[3,  1655] train_loss: 0.268 train_accuracy: 0.821 test_accuracy: 0.790\n",
            "[3,  1660] train_loss: 0.640 train_accuracy: 0.796 test_accuracy: 0.760\n",
            "[3,  1665] train_loss: 0.912 train_accuracy: 0.782 test_accuracy: 0.748\n",
            "[3,  1670] train_loss: 0.581 train_accuracy: 0.814 test_accuracy: 0.780\n",
            "[3,  1675] train_loss: 1.037 train_accuracy: 0.844 test_accuracy: 0.815\n",
            "[3,  1680] train_loss: 0.366 train_accuracy: 0.829 test_accuracy: 0.804\n",
            "[3,  1685] train_loss: 0.564 train_accuracy: 0.857 test_accuracy: 0.837\n",
            "[3,  1690] train_loss: 0.373 train_accuracy: 0.862 test_accuracy: 0.842\n",
            "[3,  1695] train_loss: 0.607 train_accuracy: 0.866 test_accuracy: 0.852\n",
            "[3,  1700] train_loss: 0.586 train_accuracy: 0.843 test_accuracy: 0.832\n",
            "[3,  1705] train_loss: 0.347 train_accuracy: 0.849 test_accuracy: 0.839\n",
            "[3,  1710] train_loss: 0.328 train_accuracy: 0.867 test_accuracy: 0.855\n",
            "[3,  1715] train_loss: 0.586 train_accuracy: 0.869 test_accuracy: 0.855\n",
            "[3,  1720] train_loss: 0.194 train_accuracy: 0.856 test_accuracy: 0.839\n",
            "[3,  1725] train_loss: 0.429 train_accuracy: 0.849 test_accuracy: 0.836\n",
            "[4,     5] train_loss: 0.252 train_accuracy: 0.850 test_accuracy: 0.836\n",
            "[4,    10] train_loss: 0.192 train_accuracy: 0.854 test_accuracy: 0.839\n",
            "[4,    15] train_loss: 0.765 train_accuracy: 0.867 test_accuracy: 0.850\n",
            "[4,    20] train_loss: 0.663 train_accuracy: 0.868 test_accuracy: 0.846\n",
            "[4,    25] train_loss: 0.527 train_accuracy: 0.863 test_accuracy: 0.846\n",
            "[4,    30] train_loss: 0.292 train_accuracy: 0.849 test_accuracy: 0.834\n",
            "[4,    35] train_loss: 0.264 train_accuracy: 0.839 test_accuracy: 0.835\n",
            "[4,    40] train_loss: 0.531 train_accuracy: 0.843 test_accuracy: 0.852\n",
            "[4,    45] train_loss: 0.424 train_accuracy: 0.851 test_accuracy: 0.863\n",
            "[4,    50] train_loss: 0.178 train_accuracy: 0.853 test_accuracy: 0.861\n",
            "[4,    55] train_loss: 0.611 train_accuracy: 0.868 test_accuracy: 0.856\n",
            "[4,    60] train_loss: 0.355 train_accuracy: 0.877 test_accuracy: 0.854\n",
            "[4,    65] train_loss: 0.730 train_accuracy: 0.871 test_accuracy: 0.844\n",
            "[4,    70] train_loss: 0.196 train_accuracy: 0.856 test_accuracy: 0.826\n",
            "[4,    75] train_loss: 0.599 train_accuracy: 0.852 test_accuracy: 0.820\n",
            "[4,    80] train_loss: 0.403 train_accuracy: 0.856 test_accuracy: 0.815\n",
            "[4,    85] train_loss: 1.242 train_accuracy: 0.882 test_accuracy: 0.843\n",
            "[4,    90] train_loss: 0.229 train_accuracy: 0.871 test_accuracy: 0.841\n",
            "[4,    95] train_loss: 0.238 train_accuracy: 0.858 test_accuracy: 0.833\n",
            "[4,   100] train_loss: 0.490 train_accuracy: 0.862 test_accuracy: 0.845\n",
            "[4,   105] train_loss: 0.363 train_accuracy: 0.856 test_accuracy: 0.851\n",
            "[4,   110] train_loss: 1.152 train_accuracy: 0.857 test_accuracy: 0.842\n",
            "[4,   115] train_loss: 0.312 train_accuracy: 0.860 test_accuracy: 0.840\n",
            "[4,   120] train_loss: 0.807 train_accuracy: 0.849 test_accuracy: 0.834\n",
            "[4,   125] train_loss: 0.176 train_accuracy: 0.847 test_accuracy: 0.836\n",
            "[4,   130] train_loss: 0.320 train_accuracy: 0.876 test_accuracy: 0.862\n",
            "[4,   135] train_loss: 0.232 train_accuracy: 0.882 test_accuracy: 0.862\n",
            "[4,   140] train_loss: 0.705 train_accuracy: 0.878 test_accuracy: 0.857\n",
            "[4,   145] train_loss: 0.397 train_accuracy: 0.870 test_accuracy: 0.852\n",
            "[4,   150] train_loss: 0.239 train_accuracy: 0.868 test_accuracy: 0.853\n",
            "[4,   155] train_loss: 0.360 train_accuracy: 0.865 test_accuracy: 0.852\n",
            "[4,   160] train_loss: 0.720 train_accuracy: 0.860 test_accuracy: 0.849\n",
            "[4,   165] train_loss: 0.390 train_accuracy: 0.869 test_accuracy: 0.850\n",
            "[4,   170] train_loss: 0.198 train_accuracy: 0.870 test_accuracy: 0.836\n",
            "[4,   175] train_loss: 0.356 train_accuracy: 0.856 test_accuracy: 0.817\n",
            "[4,   180] train_loss: 0.427 train_accuracy: 0.828 test_accuracy: 0.789\n",
            "[4,   185] train_loss: 0.557 train_accuracy: 0.851 test_accuracy: 0.809\n",
            "[4,   190] train_loss: 0.442 train_accuracy: 0.868 test_accuracy: 0.841\n",
            "[4,   195] train_loss: 0.245 train_accuracy: 0.876 test_accuracy: 0.859\n",
            "[4,   200] train_loss: 0.212 train_accuracy: 0.875 test_accuracy: 0.862\n",
            "[4,   205] train_loss: 0.758 train_accuracy: 0.874 test_accuracy: 0.858\n",
            "[4,   210] train_loss: 0.496 train_accuracy: 0.840 test_accuracy: 0.826\n",
            "[4,   215] train_loss: 0.646 train_accuracy: 0.816 test_accuracy: 0.797\n",
            "[4,   220] train_loss: 0.576 train_accuracy: 0.850 test_accuracy: 0.828\n",
            "[4,   225] train_loss: 0.492 train_accuracy: 0.850 test_accuracy: 0.828\n",
            "[4,   230] train_loss: 0.284 train_accuracy: 0.840 test_accuracy: 0.818\n",
            "[4,   235] train_loss: 0.163 train_accuracy: 0.838 test_accuracy: 0.815\n",
            "[4,   240] train_loss: 0.255 train_accuracy: 0.827 test_accuracy: 0.802\n",
            "[4,   245] train_loss: 0.410 train_accuracy: 0.843 test_accuracy: 0.819\n",
            "[4,   250] train_loss: 0.763 train_accuracy: 0.848 test_accuracy: 0.828\n",
            "[4,   255] train_loss: 0.662 train_accuracy: 0.836 test_accuracy: 0.810\n",
            "[4,   260] train_loss: 0.299 train_accuracy: 0.830 test_accuracy: 0.800\n",
            "[4,   265] train_loss: 0.623 train_accuracy: 0.868 test_accuracy: 0.846\n",
            "[4,   270] train_loss: 0.238 train_accuracy: 0.884 test_accuracy: 0.861\n",
            "[4,   275] train_loss: 0.208 train_accuracy: 0.877 test_accuracy: 0.863\n",
            "[4,   280] train_loss: 0.431 train_accuracy: 0.867 test_accuracy: 0.861\n",
            "[4,   285] train_loss: 0.374 train_accuracy: 0.858 test_accuracy: 0.839\n",
            "[4,   290] train_loss: 0.235 train_accuracy: 0.847 test_accuracy: 0.817\n",
            "[4,   295] train_loss: 0.265 train_accuracy: 0.836 test_accuracy: 0.807\n",
            "[4,   300] train_loss: 0.546 train_accuracy: 0.838 test_accuracy: 0.816\n",
            "[4,   305] train_loss: 0.203 train_accuracy: 0.844 test_accuracy: 0.825\n",
            "[4,   310] train_loss: 0.922 train_accuracy: 0.857 test_accuracy: 0.836\n",
            "[4,   315] train_loss: 0.225 train_accuracy: 0.836 test_accuracy: 0.819\n",
            "[4,   320] train_loss: 0.582 train_accuracy: 0.868 test_accuracy: 0.854\n",
            "[4,   325] train_loss: 0.171 train_accuracy: 0.880 test_accuracy: 0.872\n",
            "[4,   330] train_loss: 0.785 train_accuracy: 0.880 test_accuracy: 0.866\n",
            "[4,   335] train_loss: 0.800 train_accuracy: 0.872 test_accuracy: 0.857\n",
            "[4,   340] train_loss: 0.461 train_accuracy: 0.865 test_accuracy: 0.845\n",
            "[4,   345] train_loss: 0.798 train_accuracy: 0.852 test_accuracy: 0.832\n",
            "[4,   350] train_loss: 0.583 train_accuracy: 0.833 test_accuracy: 0.813\n",
            "[4,   355] train_loss: 0.488 train_accuracy: 0.825 test_accuracy: 0.814\n",
            "[4,   360] train_loss: 0.291 train_accuracy: 0.835 test_accuracy: 0.822\n",
            "[4,   365] train_loss: 0.532 train_accuracy: 0.843 test_accuracy: 0.828\n",
            "[4,   370] train_loss: 0.432 train_accuracy: 0.847 test_accuracy: 0.831\n",
            "[4,   375] train_loss: 0.311 train_accuracy: 0.859 test_accuracy: 0.829\n",
            "[4,   380] train_loss: 0.252 train_accuracy: 0.855 test_accuracy: 0.819\n",
            "[4,   385] train_loss: 0.940 train_accuracy: 0.846 test_accuracy: 0.807\n",
            "[4,   390] train_loss: 0.644 train_accuracy: 0.857 test_accuracy: 0.823\n",
            "[4,   395] train_loss: 0.545 train_accuracy: 0.841 test_accuracy: 0.815\n",
            "[4,   400] train_loss: 0.499 train_accuracy: 0.833 test_accuracy: 0.803\n",
            "[4,   405] train_loss: 0.785 train_accuracy: 0.856 test_accuracy: 0.817\n",
            "[4,   410] train_loss: 0.503 train_accuracy: 0.870 test_accuracy: 0.835\n",
            "[4,   415] train_loss: 0.686 train_accuracy: 0.868 test_accuracy: 0.841\n",
            "[4,   420] train_loss: 0.332 train_accuracy: 0.862 test_accuracy: 0.839\n",
            "[4,   425] train_loss: 0.391 train_accuracy: 0.842 test_accuracy: 0.808\n",
            "[4,   430] train_loss: 0.246 train_accuracy: 0.830 test_accuracy: 0.792\n",
            "[4,   435] train_loss: 0.529 train_accuracy: 0.850 test_accuracy: 0.820\n",
            "[4,   440] train_loss: 0.289 train_accuracy: 0.846 test_accuracy: 0.844\n",
            "[4,   445] train_loss: 0.566 train_accuracy: 0.850 test_accuracy: 0.845\n",
            "[4,   450] train_loss: 0.479 train_accuracy: 0.855 test_accuracy: 0.836\n",
            "[4,   455] train_loss: 0.508 train_accuracy: 0.845 test_accuracy: 0.825\n",
            "[4,   460] train_loss: 0.306 train_accuracy: 0.824 test_accuracy: 0.811\n",
            "[4,   465] train_loss: 1.335 train_accuracy: 0.787 test_accuracy: 0.789\n",
            "[4,   470] train_loss: 0.503 train_accuracy: 0.742 test_accuracy: 0.742\n",
            "[4,   475] train_loss: 0.756 train_accuracy: 0.792 test_accuracy: 0.762\n",
            "[4,   480] train_loss: 0.938 train_accuracy: 0.816 test_accuracy: 0.790\n",
            "[4,   485] train_loss: 0.484 train_accuracy: 0.808 test_accuracy: 0.779\n",
            "[4,   490] train_loss: 0.475 train_accuracy: 0.821 test_accuracy: 0.797\n",
            "[4,   495] train_loss: 0.410 train_accuracy: 0.794 test_accuracy: 0.782\n",
            "[4,   500] train_loss: 0.942 train_accuracy: 0.780 test_accuracy: 0.775\n",
            "[4,   505] train_loss: 0.535 train_accuracy: 0.821 test_accuracy: 0.805\n",
            "[4,   510] train_loss: 0.383 train_accuracy: 0.846 test_accuracy: 0.825\n",
            "[4,   515] train_loss: 0.830 train_accuracy: 0.824 test_accuracy: 0.801\n",
            "[4,   520] train_loss: 0.254 train_accuracy: 0.824 test_accuracy: 0.799\n",
            "[4,   525] train_loss: 0.612 train_accuracy: 0.847 test_accuracy: 0.822\n",
            "[4,   530] train_loss: 0.348 train_accuracy: 0.861 test_accuracy: 0.834\n",
            "[4,   535] train_loss: 0.621 train_accuracy: 0.861 test_accuracy: 0.834\n",
            "[4,   540] train_loss: 0.594 train_accuracy: 0.862 test_accuracy: 0.833\n",
            "[4,   545] train_loss: 0.661 train_accuracy: 0.836 test_accuracy: 0.812\n",
            "[4,   550] train_loss: 0.550 train_accuracy: 0.838 test_accuracy: 0.832\n",
            "[4,   555] train_loss: 0.775 train_accuracy: 0.849 test_accuracy: 0.838\n",
            "[4,   560] train_loss: 0.484 train_accuracy: 0.831 test_accuracy: 0.815\n",
            "[4,   565] train_loss: 0.415 train_accuracy: 0.837 test_accuracy: 0.810\n",
            "[4,   570] train_loss: 0.273 train_accuracy: 0.868 test_accuracy: 0.845\n",
            "[4,   575] train_loss: 0.852 train_accuracy: 0.851 test_accuracy: 0.839\n",
            "[4,   580] train_loss: 0.337 train_accuracy: 0.859 test_accuracy: 0.845\n",
            "[4,   585] train_loss: 0.690 train_accuracy: 0.841 test_accuracy: 0.827\n",
            "[4,   590] train_loss: 0.192 train_accuracy: 0.822 test_accuracy: 0.811\n",
            "[4,   595] train_loss: 0.420 train_accuracy: 0.804 test_accuracy: 0.793\n",
            "[4,   600] train_loss: 1.123 train_accuracy: 0.814 test_accuracy: 0.807\n",
            "[4,   605] train_loss: 0.418 train_accuracy: 0.770 test_accuracy: 0.771\n",
            "[4,   610] train_loss: 0.903 train_accuracy: 0.811 test_accuracy: 0.801\n",
            "[4,   615] train_loss: 0.678 train_accuracy: 0.846 test_accuracy: 0.813\n",
            "[4,   620] train_loss: 0.304 train_accuracy: 0.840 test_accuracy: 0.800\n",
            "[4,   625] train_loss: 0.544 train_accuracy: 0.820 test_accuracy: 0.788\n",
            "[4,   630] train_loss: 0.505 train_accuracy: 0.832 test_accuracy: 0.797\n",
            "[4,   635] train_loss: 0.220 train_accuracy: 0.833 test_accuracy: 0.798\n",
            "[4,   640] train_loss: 0.640 train_accuracy: 0.849 test_accuracy: 0.823\n",
            "[4,   645] train_loss: 0.366 train_accuracy: 0.852 test_accuracy: 0.829\n",
            "[4,   650] train_loss: 0.739 train_accuracy: 0.874 test_accuracy: 0.841\n",
            "[4,   655] train_loss: 0.581 train_accuracy: 0.868 test_accuracy: 0.846\n",
            "[4,   660] train_loss: 0.465 train_accuracy: 0.859 test_accuracy: 0.845\n",
            "[4,   665] train_loss: 0.222 train_accuracy: 0.836 test_accuracy: 0.833\n",
            "[4,   670] train_loss: 0.925 train_accuracy: 0.815 test_accuracy: 0.813\n",
            "[4,   675] train_loss: 0.907 train_accuracy: 0.836 test_accuracy: 0.832\n",
            "[4,   680] train_loss: 0.826 train_accuracy: 0.827 test_accuracy: 0.813\n",
            "[4,   685] train_loss: 0.654 train_accuracy: 0.851 test_accuracy: 0.827\n",
            "[4,   690] train_loss: 0.554 train_accuracy: 0.863 test_accuracy: 0.836\n",
            "[4,   695] train_loss: 0.395 train_accuracy: 0.849 test_accuracy: 0.824\n",
            "[4,   700] train_loss: 1.061 train_accuracy: 0.849 test_accuracy: 0.834\n",
            "[4,   705] train_loss: 0.414 train_accuracy: 0.851 test_accuracy: 0.854\n",
            "[4,   710] train_loss: 0.367 train_accuracy: 0.851 test_accuracy: 0.856\n",
            "[4,   715] train_loss: 0.854 train_accuracy: 0.856 test_accuracy: 0.850\n",
            "[4,   720] train_loss: 0.478 train_accuracy: 0.849 test_accuracy: 0.834\n",
            "[4,   725] train_loss: 0.424 train_accuracy: 0.832 test_accuracy: 0.803\n",
            "[4,   730] train_loss: 0.245 train_accuracy: 0.830 test_accuracy: 0.795\n",
            "[4,   735] train_loss: 0.537 train_accuracy: 0.838 test_accuracy: 0.809\n",
            "[4,   740] train_loss: 0.383 train_accuracy: 0.836 test_accuracy: 0.810\n",
            "[4,   745] train_loss: 0.383 train_accuracy: 0.839 test_accuracy: 0.813\n",
            "[4,   750] train_loss: 0.219 train_accuracy: 0.834 test_accuracy: 0.818\n",
            "[4,   755] train_loss: 0.660 train_accuracy: 0.847 test_accuracy: 0.824\n",
            "[4,   760] train_loss: 0.674 train_accuracy: 0.871 test_accuracy: 0.839\n",
            "[4,   765] train_loss: 0.395 train_accuracy: 0.867 test_accuracy: 0.834\n",
            "[4,   770] train_loss: 0.316 train_accuracy: 0.849 test_accuracy: 0.816\n",
            "[4,   775] train_loss: 0.488 train_accuracy: 0.863 test_accuracy: 0.829\n",
            "[4,   780] train_loss: 0.572 train_accuracy: 0.872 test_accuracy: 0.846\n",
            "[4,   785] train_loss: 0.260 train_accuracy: 0.876 test_accuracy: 0.853\n",
            "[4,   790] train_loss: 0.196 train_accuracy: 0.876 test_accuracy: 0.858\n",
            "[4,   795] train_loss: 0.292 train_accuracy: 0.847 test_accuracy: 0.845\n",
            "[4,   800] train_loss: 0.090 train_accuracy: 0.822 test_accuracy: 0.825\n",
            "[4,   805] train_loss: 0.771 train_accuracy: 0.837 test_accuracy: 0.821\n",
            "[4,   810] train_loss: 0.159 train_accuracy: 0.825 test_accuracy: 0.805\n",
            "[4,   815] train_loss: 0.547 train_accuracy: 0.827 test_accuracy: 0.809\n",
            "[4,   820] train_loss: 0.634 train_accuracy: 0.845 test_accuracy: 0.833\n",
            "[4,   825] train_loss: 0.440 train_accuracy: 0.859 test_accuracy: 0.842\n",
            "[4,   830] train_loss: 0.239 train_accuracy: 0.859 test_accuracy: 0.847\n",
            "[4,   835] train_loss: 0.338 train_accuracy: 0.861 test_accuracy: 0.852\n",
            "[4,   840] train_loss: 0.532 train_accuracy: 0.867 test_accuracy: 0.855\n",
            "[4,   845] train_loss: 0.944 train_accuracy: 0.860 test_accuracy: 0.830\n",
            "[4,   850] train_loss: 0.319 train_accuracy: 0.854 test_accuracy: 0.820\n",
            "[4,   855] train_loss: 1.083 train_accuracy: 0.878 test_accuracy: 0.848\n",
            "[4,   860] train_loss: 0.537 train_accuracy: 0.851 test_accuracy: 0.823\n",
            "[4,   865] train_loss: 0.467 train_accuracy: 0.865 test_accuracy: 0.838\n",
            "[4,   870] train_loss: 0.432 train_accuracy: 0.880 test_accuracy: 0.858\n",
            "[4,   875] train_loss: 0.478 train_accuracy: 0.886 test_accuracy: 0.865\n",
            "[4,   880] train_loss: 0.174 train_accuracy: 0.881 test_accuracy: 0.862\n",
            "[4,   885] train_loss: 0.247 train_accuracy: 0.871 test_accuracy: 0.851\n",
            "[4,   890] train_loss: 0.375 train_accuracy: 0.863 test_accuracy: 0.843\n",
            "[4,   895] train_loss: 0.355 train_accuracy: 0.863 test_accuracy: 0.848\n",
            "[4,   900] train_loss: 0.922 train_accuracy: 0.876 test_accuracy: 0.866\n",
            "[4,   905] train_loss: 0.418 train_accuracy: 0.872 test_accuracy: 0.863\n",
            "[4,   910] train_loss: 0.860 train_accuracy: 0.868 test_accuracy: 0.861\n",
            "[4,   915] train_loss: 0.607 train_accuracy: 0.862 test_accuracy: 0.853\n",
            "[4,   920] train_loss: 0.381 train_accuracy: 0.844 test_accuracy: 0.829\n",
            "[4,   925] train_loss: 0.276 train_accuracy: 0.832 test_accuracy: 0.823\n",
            "[4,   930] train_loss: 0.159 train_accuracy: 0.831 test_accuracy: 0.818\n",
            "[4,   935] train_loss: 0.629 train_accuracy: 0.854 test_accuracy: 0.838\n",
            "[4,   940] train_loss: 0.591 train_accuracy: 0.868 test_accuracy: 0.837\n",
            "[4,   945] train_loss: 0.690 train_accuracy: 0.861 test_accuracy: 0.830\n",
            "[4,   950] train_loss: 0.489 train_accuracy: 0.872 test_accuracy: 0.841\n",
            "[4,   955] train_loss: 0.643 train_accuracy: 0.878 test_accuracy: 0.850\n",
            "[4,   960] train_loss: 0.375 train_accuracy: 0.876 test_accuracy: 0.847\n",
            "[4,   965] train_loss: 0.469 train_accuracy: 0.881 test_accuracy: 0.848\n",
            "[4,   970] train_loss: 0.317 train_accuracy: 0.868 test_accuracy: 0.831\n",
            "[4,   975] train_loss: 0.254 train_accuracy: 0.851 test_accuracy: 0.819\n",
            "[4,   980] train_loss: 0.872 train_accuracy: 0.843 test_accuracy: 0.821\n",
            "[4,   985] train_loss: 0.812 train_accuracy: 0.824 test_accuracy: 0.810\n",
            "[4,   990] train_loss: 1.111 train_accuracy: 0.846 test_accuracy: 0.838\n",
            "[4,   995] train_loss: 0.457 train_accuracy: 0.865 test_accuracy: 0.851\n",
            "[4,  1000] train_loss: 0.548 train_accuracy: 0.858 test_accuracy: 0.835\n",
            "[4,  1005] train_loss: 0.305 train_accuracy: 0.856 test_accuracy: 0.836\n",
            "[4,  1010] train_loss: 0.487 train_accuracy: 0.868 test_accuracy: 0.843\n",
            "[4,  1015] train_loss: 0.157 train_accuracy: 0.854 test_accuracy: 0.832\n",
            "[4,  1020] train_loss: 0.404 train_accuracy: 0.842 test_accuracy: 0.822\n",
            "[4,  1025] train_loss: 0.404 train_accuracy: 0.856 test_accuracy: 0.837\n",
            "[4,  1030] train_loss: 0.397 train_accuracy: 0.884 test_accuracy: 0.869\n",
            "[4,  1035] train_loss: 0.500 train_accuracy: 0.875 test_accuracy: 0.858\n",
            "[4,  1040] train_loss: 0.178 train_accuracy: 0.872 test_accuracy: 0.843\n",
            "[4,  1045] train_loss: 0.176 train_accuracy: 0.867 test_accuracy: 0.839\n",
            "[4,  1050] train_loss: 0.328 train_accuracy: 0.852 test_accuracy: 0.832\n",
            "[4,  1055] train_loss: 0.198 train_accuracy: 0.840 test_accuracy: 0.830\n",
            "[4,  1060] train_loss: 0.422 train_accuracy: 0.852 test_accuracy: 0.836\n",
            "[4,  1065] train_loss: 1.131 train_accuracy: 0.838 test_accuracy: 0.808\n",
            "[4,  1070] train_loss: 0.360 train_accuracy: 0.852 test_accuracy: 0.818\n",
            "[4,  1075] train_loss: 0.887 train_accuracy: 0.855 test_accuracy: 0.834\n",
            "[4,  1080] train_loss: 0.611 train_accuracy: 0.804 test_accuracy: 0.783\n",
            "[4,  1085] train_loss: 0.526 train_accuracy: 0.786 test_accuracy: 0.778\n",
            "[4,  1090] train_loss: 0.752 train_accuracy: 0.823 test_accuracy: 0.808\n",
            "[4,  1095] train_loss: 0.568 train_accuracy: 0.863 test_accuracy: 0.843\n",
            "[4,  1100] train_loss: 0.766 train_accuracy: 0.852 test_accuracy: 0.844\n",
            "[4,  1105] train_loss: 0.509 train_accuracy: 0.798 test_accuracy: 0.808\n",
            "[4,  1110] train_loss: 0.809 train_accuracy: 0.803 test_accuracy: 0.816\n",
            "[4,  1115] train_loss: 0.447 train_accuracy: 0.849 test_accuracy: 0.835\n",
            "[4,  1120] train_loss: 0.465 train_accuracy: 0.867 test_accuracy: 0.846\n",
            "[4,  1125] train_loss: 0.267 train_accuracy: 0.851 test_accuracy: 0.828\n",
            "[4,  1130] train_loss: 0.270 train_accuracy: 0.835 test_accuracy: 0.809\n",
            "[4,  1135] train_loss: 0.304 train_accuracy: 0.846 test_accuracy: 0.817\n",
            "[4,  1140] train_loss: 0.836 train_accuracy: 0.864 test_accuracy: 0.838\n",
            "[4,  1145] train_loss: 0.335 train_accuracy: 0.854 test_accuracy: 0.829\n",
            "[4,  1150] train_loss: 0.461 train_accuracy: 0.868 test_accuracy: 0.845\n",
            "[4,  1155] train_loss: 0.267 train_accuracy: 0.883 test_accuracy: 0.860\n",
            "[4,  1160] train_loss: 0.679 train_accuracy: 0.870 test_accuracy: 0.835\n",
            "[4,  1165] train_loss: 0.581 train_accuracy: 0.865 test_accuracy: 0.827\n",
            "[4,  1170] train_loss: 0.256 train_accuracy: 0.867 test_accuracy: 0.834\n",
            "[4,  1175] train_loss: 0.573 train_accuracy: 0.866 test_accuracy: 0.841\n",
            "[4,  1180] train_loss: 0.292 train_accuracy: 0.860 test_accuracy: 0.848\n",
            "[4,  1185] train_loss: 0.473 train_accuracy: 0.838 test_accuracy: 0.821\n",
            "[4,  1190] train_loss: 0.781 train_accuracy: 0.786 test_accuracy: 0.761\n",
            "[4,  1195] train_loss: 0.991 train_accuracy: 0.826 test_accuracy: 0.801\n",
            "[4,  1200] train_loss: 0.405 train_accuracy: 0.807 test_accuracy: 0.777\n",
            "[4,  1205] train_loss: 0.570 train_accuracy: 0.795 test_accuracy: 0.771\n",
            "[4,  1210] train_loss: 0.341 train_accuracy: 0.801 test_accuracy: 0.802\n",
            "[4,  1215] train_loss: 0.854 train_accuracy: 0.826 test_accuracy: 0.840\n",
            "[4,  1220] train_loss: 0.577 train_accuracy: 0.862 test_accuracy: 0.842\n",
            "[4,  1225] train_loss: 0.499 train_accuracy: 0.874 test_accuracy: 0.836\n",
            "[4,  1230] train_loss: 0.872 train_accuracy: 0.843 test_accuracy: 0.810\n",
            "[4,  1235] train_loss: 0.780 train_accuracy: 0.831 test_accuracy: 0.816\n",
            "[4,  1240] train_loss: 0.326 train_accuracy: 0.805 test_accuracy: 0.798\n",
            "[4,  1245] train_loss: 0.696 train_accuracy: 0.799 test_accuracy: 0.790\n",
            "[4,  1250] train_loss: 0.265 train_accuracy: 0.772 test_accuracy: 0.753\n",
            "[4,  1255] train_loss: 0.663 train_accuracy: 0.824 test_accuracy: 0.800\n",
            "[4,  1260] train_loss: 0.296 train_accuracy: 0.849 test_accuracy: 0.817\n",
            "[4,  1265] train_loss: 0.720 train_accuracy: 0.857 test_accuracy: 0.822\n",
            "[4,  1270] train_loss: 0.213 train_accuracy: 0.850 test_accuracy: 0.811\n",
            "[4,  1275] train_loss: 0.428 train_accuracy: 0.842 test_accuracy: 0.807\n",
            "[4,  1280] train_loss: 0.741 train_accuracy: 0.848 test_accuracy: 0.826\n",
            "[4,  1285] train_loss: 0.851 train_accuracy: 0.855 test_accuracy: 0.833\n",
            "[4,  1290] train_loss: 0.234 train_accuracy: 0.847 test_accuracy: 0.834\n",
            "[4,  1295] train_loss: 0.894 train_accuracy: 0.852 test_accuracy: 0.845\n",
            "[4,  1300] train_loss: 0.205 train_accuracy: 0.850 test_accuracy: 0.843\n",
            "[4,  1305] train_loss: 0.189 train_accuracy: 0.827 test_accuracy: 0.820\n",
            "[4,  1310] train_loss: 0.502 train_accuracy: 0.827 test_accuracy: 0.811\n",
            "[4,  1315] train_loss: 0.375 train_accuracy: 0.844 test_accuracy: 0.836\n",
            "[4,  1320] train_loss: 0.712 train_accuracy: 0.824 test_accuracy: 0.819\n",
            "[4,  1325] train_loss: 0.582 train_accuracy: 0.846 test_accuracy: 0.832\n",
            "[4,  1330] train_loss: 0.513 train_accuracy: 0.878 test_accuracy: 0.856\n",
            "[4,  1335] train_loss: 0.463 train_accuracy: 0.863 test_accuracy: 0.854\n",
            "[4,  1340] train_loss: 0.509 train_accuracy: 0.857 test_accuracy: 0.844\n",
            "[4,  1345] train_loss: 0.209 train_accuracy: 0.836 test_accuracy: 0.834\n",
            "[4,  1350] train_loss: 0.474 train_accuracy: 0.845 test_accuracy: 0.842\n",
            "[4,  1355] train_loss: 0.607 train_accuracy: 0.873 test_accuracy: 0.858\n",
            "[4,  1360] train_loss: 0.433 train_accuracy: 0.863 test_accuracy: 0.831\n",
            "[4,  1365] train_loss: 0.096 train_accuracy: 0.778 test_accuracy: 0.747\n",
            "[4,  1370] train_loss: 0.882 train_accuracy: 0.762 test_accuracy: 0.728\n",
            "[4,  1375] train_loss: 0.406 train_accuracy: 0.823 test_accuracy: 0.800\n",
            "[4,  1380] train_loss: 0.979 train_accuracy: 0.859 test_accuracy: 0.843\n",
            "[4,  1385] train_loss: 0.540 train_accuracy: 0.825 test_accuracy: 0.836\n",
            "[4,  1390] train_loss: 0.497 train_accuracy: 0.831 test_accuracy: 0.852\n",
            "[4,  1395] train_loss: 0.397 train_accuracy: 0.867 test_accuracy: 0.856\n",
            "[4,  1400] train_loss: 0.599 train_accuracy: 0.868 test_accuracy: 0.842\n",
            "[4,  1405] train_loss: 0.338 train_accuracy: 0.875 test_accuracy: 0.842\n",
            "[4,  1410] train_loss: 0.284 train_accuracy: 0.864 test_accuracy: 0.830\n",
            "[4,  1415] train_loss: 0.946 train_accuracy: 0.837 test_accuracy: 0.812\n",
            "[4,  1420] train_loss: 0.405 train_accuracy: 0.796 test_accuracy: 0.785\n",
            "[4,  1425] train_loss: 1.010 train_accuracy: 0.837 test_accuracy: 0.813\n",
            "[4,  1430] train_loss: 0.329 train_accuracy: 0.867 test_accuracy: 0.840\n",
            "[4,  1435] train_loss: 0.532 train_accuracy: 0.866 test_accuracy: 0.843\n",
            "[4,  1440] train_loss: 0.210 train_accuracy: 0.855 test_accuracy: 0.837\n",
            "[4,  1445] train_loss: 0.351 train_accuracy: 0.855 test_accuracy: 0.836\n",
            "[4,  1450] train_loss: 0.154 train_accuracy: 0.832 test_accuracy: 0.812\n",
            "[4,  1455] train_loss: 0.543 train_accuracy: 0.814 test_accuracy: 0.786\n",
            "[4,  1460] train_loss: 0.418 train_accuracy: 0.831 test_accuracy: 0.799\n",
            "[4,  1465] train_loss: 0.666 train_accuracy: 0.825 test_accuracy: 0.794\n",
            "[4,  1470] train_loss: 0.721 train_accuracy: 0.822 test_accuracy: 0.792\n",
            "[4,  1475] train_loss: 0.556 train_accuracy: 0.822 test_accuracy: 0.805\n",
            "[4,  1480] train_loss: 0.368 train_accuracy: 0.830 test_accuracy: 0.825\n",
            "[4,  1485] train_loss: 0.419 train_accuracy: 0.854 test_accuracy: 0.842\n",
            "[4,  1490] train_loss: 0.388 train_accuracy: 0.875 test_accuracy: 0.862\n",
            "[4,  1495] train_loss: 0.214 train_accuracy: 0.871 test_accuracy: 0.868\n",
            "[4,  1500] train_loss: 0.727 train_accuracy: 0.877 test_accuracy: 0.867\n",
            "[4,  1505] train_loss: 0.398 train_accuracy: 0.871 test_accuracy: 0.852\n",
            "[4,  1510] train_loss: 0.655 train_accuracy: 0.833 test_accuracy: 0.815\n",
            "[4,  1515] train_loss: 0.742 train_accuracy: 0.869 test_accuracy: 0.838\n",
            "[4,  1520] train_loss: 0.522 train_accuracy: 0.853 test_accuracy: 0.822\n",
            "[4,  1525] train_loss: 0.727 train_accuracy: 0.851 test_accuracy: 0.821\n",
            "[4,  1530] train_loss: 0.499 train_accuracy: 0.824 test_accuracy: 0.794\n",
            "[4,  1535] train_loss: 0.392 train_accuracy: 0.850 test_accuracy: 0.824\n",
            "[4,  1540] train_loss: 0.234 train_accuracy: 0.860 test_accuracy: 0.837\n",
            "[4,  1545] train_loss: 1.224 train_accuracy: 0.845 test_accuracy: 0.830\n",
            "[4,  1550] train_loss: 0.649 train_accuracy: 0.778 test_accuracy: 0.785\n",
            "[4,  1555] train_loss: 0.452 train_accuracy: 0.760 test_accuracy: 0.778\n",
            "[4,  1560] train_loss: 0.770 train_accuracy: 0.868 test_accuracy: 0.855\n",
            "[4,  1565] train_loss: 0.589 train_accuracy: 0.880 test_accuracy: 0.866\n",
            "[4,  1570] train_loss: 0.355 train_accuracy: 0.867 test_accuracy: 0.849\n",
            "[4,  1575] train_loss: 0.872 train_accuracy: 0.816 test_accuracy: 0.814\n",
            "[4,  1580] train_loss: 0.728 train_accuracy: 0.838 test_accuracy: 0.818\n",
            "[4,  1585] train_loss: 0.254 train_accuracy: 0.793 test_accuracy: 0.768\n",
            "[4,  1590] train_loss: 0.336 train_accuracy: 0.848 test_accuracy: 0.836\n",
            "[4,  1595] train_loss: 0.738 train_accuracy: 0.848 test_accuracy: 0.849\n",
            "[4,  1600] train_loss: 0.377 train_accuracy: 0.842 test_accuracy: 0.843\n",
            "[4,  1605] train_loss: 0.705 train_accuracy: 0.859 test_accuracy: 0.849\n",
            "[4,  1610] train_loss: 0.808 train_accuracy: 0.867 test_accuracy: 0.847\n",
            "[4,  1615] train_loss: 0.510 train_accuracy: 0.859 test_accuracy: 0.829\n",
            "[4,  1620] train_loss: 0.259 train_accuracy: 0.852 test_accuracy: 0.818\n",
            "[4,  1625] train_loss: 0.430 train_accuracy: 0.866 test_accuracy: 0.829\n",
            "[4,  1630] train_loss: 0.092 train_accuracy: 0.876 test_accuracy: 0.846\n",
            "[4,  1635] train_loss: 0.622 train_accuracy: 0.868 test_accuracy: 0.841\n",
            "[4,  1640] train_loss: 0.642 train_accuracy: 0.823 test_accuracy: 0.799\n",
            "[4,  1645] train_loss: 0.569 train_accuracy: 0.838 test_accuracy: 0.824\n",
            "[4,  1650] train_loss: 0.620 train_accuracy: 0.837 test_accuracy: 0.836\n",
            "[4,  1655] train_loss: 0.426 train_accuracy: 0.843 test_accuracy: 0.841\n",
            "[4,  1660] train_loss: 0.844 train_accuracy: 0.841 test_accuracy: 0.825\n",
            "[4,  1665] train_loss: 0.734 train_accuracy: 0.832 test_accuracy: 0.806\n",
            "[4,  1670] train_loss: 0.915 train_accuracy: 0.832 test_accuracy: 0.806\n",
            "[4,  1675] train_loss: 0.345 train_accuracy: 0.833 test_accuracy: 0.831\n",
            "[4,  1680] train_loss: 0.683 train_accuracy: 0.836 test_accuracy: 0.847\n",
            "[4,  1685] train_loss: 0.379 train_accuracy: 0.855 test_accuracy: 0.854\n",
            "[4,  1690] train_loss: 0.487 train_accuracy: 0.826 test_accuracy: 0.835\n",
            "[4,  1695] train_loss: 0.385 train_accuracy: 0.836 test_accuracy: 0.827\n",
            "[4,  1700] train_loss: 0.400 train_accuracy: 0.864 test_accuracy: 0.843\n",
            "[4,  1705] train_loss: 0.481 train_accuracy: 0.867 test_accuracy: 0.830\n",
            "[4,  1710] train_loss: 0.785 train_accuracy: 0.847 test_accuracy: 0.814\n",
            "[4,  1715] train_loss: 0.317 train_accuracy: 0.827 test_accuracy: 0.798\n",
            "[4,  1720] train_loss: 0.493 train_accuracy: 0.813 test_accuracy: 0.785\n",
            "[4,  1725] train_loss: 0.590 train_accuracy: 0.737 test_accuracy: 0.701\n",
            "[5,     5] train_loss: 0.773 train_accuracy: 0.846 test_accuracy: 0.832\n",
            "[5,    10] train_loss: 0.248 train_accuracy: 0.846 test_accuracy: 0.831\n",
            "[5,    15] train_loss: 0.392 train_accuracy: 0.826 test_accuracy: 0.809\n",
            "[5,    20] train_loss: 0.421 train_accuracy: 0.857 test_accuracy: 0.841\n",
            "[5,    25] train_loss: 0.115 train_accuracy: 0.855 test_accuracy: 0.836\n",
            "[5,    30] train_loss: 0.559 train_accuracy: 0.856 test_accuracy: 0.842\n",
            "[5,    35] train_loss: 0.465 train_accuracy: 0.865 test_accuracy: 0.840\n",
            "[5,    40] train_loss: 0.533 train_accuracy: 0.852 test_accuracy: 0.822\n",
            "[5,    45] train_loss: 0.222 train_accuracy: 0.859 test_accuracy: 0.825\n",
            "[5,    50] train_loss: 0.478 train_accuracy: 0.848 test_accuracy: 0.827\n",
            "[5,    55] train_loss: 0.283 train_accuracy: 0.850 test_accuracy: 0.827\n",
            "[5,    60] train_loss: 0.466 train_accuracy: 0.870 test_accuracy: 0.860\n",
            "[5,    65] train_loss: 0.536 train_accuracy: 0.865 test_accuracy: 0.852\n",
            "[5,    70] train_loss: 0.474 train_accuracy: 0.871 test_accuracy: 0.847\n",
            "[5,    75] train_loss: 0.518 train_accuracy: 0.844 test_accuracy: 0.813\n",
            "[5,    80] train_loss: 0.568 train_accuracy: 0.802 test_accuracy: 0.761\n",
            "[5,    85] train_loss: 1.155 train_accuracy: 0.700 test_accuracy: 0.655\n",
            "[5,    90] train_loss: 0.446 train_accuracy: 0.784 test_accuracy: 0.747\n",
            "[5,    95] train_loss: 1.069 train_accuracy: 0.857 test_accuracy: 0.823\n",
            "[5,   100] train_loss: 0.662 train_accuracy: 0.832 test_accuracy: 0.808\n",
            "[5,   105] train_loss: 0.957 train_accuracy: 0.837 test_accuracy: 0.820\n",
            "[5,   110] train_loss: 0.517 train_accuracy: 0.798 test_accuracy: 0.778\n",
            "[5,   115] train_loss: 0.980 train_accuracy: 0.821 test_accuracy: 0.802\n",
            "[5,   120] train_loss: 0.389 train_accuracy: 0.752 test_accuracy: 0.729\n",
            "[5,   125] train_loss: 0.635 train_accuracy: 0.840 test_accuracy: 0.808\n",
            "[5,   130] train_loss: 0.805 train_accuracy: 0.851 test_accuracy: 0.829\n",
            "[5,   135] train_loss: 0.617 train_accuracy: 0.823 test_accuracy: 0.807\n",
            "[5,   140] train_loss: 0.423 train_accuracy: 0.822 test_accuracy: 0.807\n",
            "[5,   145] train_loss: 0.501 train_accuracy: 0.811 test_accuracy: 0.792\n",
            "[5,   150] train_loss: 0.489 train_accuracy: 0.848 test_accuracy: 0.821\n",
            "[5,   155] train_loss: 0.630 train_accuracy: 0.860 test_accuracy: 0.829\n",
            "[5,   160] train_loss: 0.246 train_accuracy: 0.862 test_accuracy: 0.831\n",
            "[5,   165] train_loss: 0.541 train_accuracy: 0.875 test_accuracy: 0.840\n",
            "[5,   170] train_loss: 0.235 train_accuracy: 0.865 test_accuracy: 0.829\n",
            "[5,   175] train_loss: 0.334 train_accuracy: 0.839 test_accuracy: 0.801\n",
            "[5,   180] train_loss: 0.386 train_accuracy: 0.850 test_accuracy: 0.817\n",
            "[5,   185] train_loss: 0.537 train_accuracy: 0.862 test_accuracy: 0.836\n",
            "[5,   190] train_loss: 0.387 train_accuracy: 0.883 test_accuracy: 0.861\n",
            "[5,   195] train_loss: 0.128 train_accuracy: 0.879 test_accuracy: 0.852\n",
            "[5,   200] train_loss: 0.314 train_accuracy: 0.875 test_accuracy: 0.848\n",
            "[5,   205] train_loss: 0.479 train_accuracy: 0.848 test_accuracy: 0.818\n",
            "[5,   210] train_loss: 0.560 train_accuracy: 0.838 test_accuracy: 0.816\n",
            "[5,   215] train_loss: 0.577 train_accuracy: 0.839 test_accuracy: 0.825\n",
            "[5,   220] train_loss: 0.753 train_accuracy: 0.818 test_accuracy: 0.831\n",
            "[5,   225] train_loss: 0.670 train_accuracy: 0.816 test_accuracy: 0.847\n",
            "[5,   230] train_loss: 0.464 train_accuracy: 0.845 test_accuracy: 0.858\n",
            "[5,   235] train_loss: 0.452 train_accuracy: 0.821 test_accuracy: 0.825\n",
            "[5,   240] train_loss: 0.811 train_accuracy: 0.857 test_accuracy: 0.837\n",
            "[5,   245] train_loss: 0.294 train_accuracy: 0.841 test_accuracy: 0.811\n",
            "[5,   250] train_loss: 0.327 train_accuracy: 0.834 test_accuracy: 0.802\n",
            "[5,   255] train_loss: 0.673 train_accuracy: 0.835 test_accuracy: 0.808\n",
            "[5,   260] train_loss: 0.530 train_accuracy: 0.842 test_accuracy: 0.814\n",
            "[5,   265] train_loss: 0.329 train_accuracy: 0.850 test_accuracy: 0.825\n",
            "[5,   270] train_loss: 0.619 train_accuracy: 0.854 test_accuracy: 0.841\n",
            "[5,   275] train_loss: 0.348 train_accuracy: 0.848 test_accuracy: 0.847\n",
            "[5,   280] train_loss: 0.530 train_accuracy: 0.865 test_accuracy: 0.859\n",
            "[5,   285] train_loss: 0.362 train_accuracy: 0.884 test_accuracy: 0.862\n",
            "[5,   290] train_loss: 0.285 train_accuracy: 0.886 test_accuracy: 0.857\n",
            "[5,   295] train_loss: 0.152 train_accuracy: 0.881 test_accuracy: 0.850\n",
            "[5,   300] train_loss: 0.329 train_accuracy: 0.876 test_accuracy: 0.843\n",
            "[5,   305] train_loss: 0.500 train_accuracy: 0.871 test_accuracy: 0.837\n",
            "[5,   310] train_loss: 0.171 train_accuracy: 0.867 test_accuracy: 0.831\n",
            "[5,   315] train_loss: 0.868 train_accuracy: 0.885 test_accuracy: 0.854\n",
            "[5,   320] train_loss: 0.548 train_accuracy: 0.886 test_accuracy: 0.854\n",
            "[5,   325] train_loss: 0.315 train_accuracy: 0.875 test_accuracy: 0.853\n",
            "[5,   330] train_loss: 0.321 train_accuracy: 0.869 test_accuracy: 0.851\n",
            "[5,   335] train_loss: 0.652 train_accuracy: 0.867 test_accuracy: 0.849\n",
            "[5,   340] train_loss: 0.194 train_accuracy: 0.835 test_accuracy: 0.829\n",
            "[5,   345] train_loss: 0.370 train_accuracy: 0.856 test_accuracy: 0.839\n",
            "[5,   350] train_loss: 0.726 train_accuracy: 0.859 test_accuracy: 0.835\n",
            "[5,   355] train_loss: 0.439 train_accuracy: 0.872 test_accuracy: 0.845\n",
            "[5,   360] train_loss: 0.141 train_accuracy: 0.883 test_accuracy: 0.861\n",
            "[5,   365] train_loss: 0.651 train_accuracy: 0.892 test_accuracy: 0.867\n",
            "[5,   370] train_loss: 0.362 train_accuracy: 0.867 test_accuracy: 0.841\n",
            "[5,   375] train_loss: 0.684 train_accuracy: 0.852 test_accuracy: 0.828\n",
            "[5,   380] train_loss: 0.357 train_accuracy: 0.863 test_accuracy: 0.829\n",
            "[5,   385] train_loss: 0.641 train_accuracy: 0.871 test_accuracy: 0.838\n",
            "[5,   390] train_loss: 0.325 train_accuracy: 0.845 test_accuracy: 0.815\n",
            "[5,   395] train_loss: 0.375 train_accuracy: 0.837 test_accuracy: 0.804\n",
            "[5,   400] train_loss: 0.550 train_accuracy: 0.846 test_accuracy: 0.816\n",
            "[5,   405] train_loss: 0.547 train_accuracy: 0.837 test_accuracy: 0.812\n",
            "[5,   410] train_loss: 0.282 train_accuracy: 0.848 test_accuracy: 0.818\n",
            "[5,   415] train_loss: 0.620 train_accuracy: 0.868 test_accuracy: 0.835\n",
            "[5,   420] train_loss: 0.688 train_accuracy: 0.887 test_accuracy: 0.853\n",
            "[5,   425] train_loss: 0.131 train_accuracy: 0.893 test_accuracy: 0.865\n",
            "[5,   430] train_loss: 0.158 train_accuracy: 0.889 test_accuracy: 0.864\n",
            "[5,   435] train_loss: 0.474 train_accuracy: 0.883 test_accuracy: 0.865\n",
            "[5,   440] train_loss: 0.380 train_accuracy: 0.864 test_accuracy: 0.852\n",
            "[5,   445] train_loss: 0.527 train_accuracy: 0.871 test_accuracy: 0.853\n",
            "[5,   450] train_loss: 0.285 train_accuracy: 0.869 test_accuracy: 0.846\n",
            "[5,   455] train_loss: 0.332 train_accuracy: 0.881 test_accuracy: 0.855\n",
            "[5,   460] train_loss: 0.428 train_accuracy: 0.880 test_accuracy: 0.851\n",
            "[5,   465] train_loss: 0.581 train_accuracy: 0.866 test_accuracy: 0.828\n",
            "[5,   470] train_loss: 0.932 train_accuracy: 0.854 test_accuracy: 0.816\n",
            "[5,   475] train_loss: 0.319 train_accuracy: 0.837 test_accuracy: 0.805\n",
            "[5,   480] train_loss: 0.838 train_accuracy: 0.847 test_accuracy: 0.809\n",
            "[5,   485] train_loss: 0.811 train_accuracy: 0.854 test_accuracy: 0.819\n",
            "[5,   490] train_loss: 0.692 train_accuracy: 0.855 test_accuracy: 0.825\n",
            "[5,   495] train_loss: 0.354 train_accuracy: 0.852 test_accuracy: 0.821\n",
            "[5,   500] train_loss: 0.789 train_accuracy: 0.873 test_accuracy: 0.843\n",
            "[5,   505] train_loss: 0.366 train_accuracy: 0.875 test_accuracy: 0.846\n",
            "[5,   510] train_loss: 0.286 train_accuracy: 0.852 test_accuracy: 0.839\n",
            "[5,   515] train_loss: 1.131 train_accuracy: 0.845 test_accuracy: 0.844\n",
            "[5,   520] train_loss: 0.360 train_accuracy: 0.854 test_accuracy: 0.852\n",
            "[5,   525] train_loss: 0.172 train_accuracy: 0.867 test_accuracy: 0.862\n",
            "[5,   530] train_loss: 0.361 train_accuracy: 0.876 test_accuracy: 0.866\n",
            "[5,   535] train_loss: 0.264 train_accuracy: 0.885 test_accuracy: 0.866\n",
            "[5,   540] train_loss: 0.259 train_accuracy: 0.885 test_accuracy: 0.862\n",
            "[5,   545] train_loss: 0.522 train_accuracy: 0.885 test_accuracy: 0.853\n",
            "[5,   550] train_loss: 0.147 train_accuracy: 0.871 test_accuracy: 0.840\n",
            "[5,   555] train_loss: 0.545 train_accuracy: 0.857 test_accuracy: 0.821\n",
            "[5,   560] train_loss: 0.447 train_accuracy: 0.851 test_accuracy: 0.809\n",
            "[5,   565] train_loss: 0.847 train_accuracy: 0.836 test_accuracy: 0.795\n",
            "[5,   570] train_loss: 0.617 train_accuracy: 0.866 test_accuracy: 0.835\n",
            "[5,   575] train_loss: 0.763 train_accuracy: 0.861 test_accuracy: 0.833\n",
            "[5,   580] train_loss: 0.368 train_accuracy: 0.878 test_accuracy: 0.852\n",
            "[5,   585] train_loss: 0.445 train_accuracy: 0.893 test_accuracy: 0.867\n",
            "[5,   590] train_loss: 0.377 train_accuracy: 0.903 test_accuracy: 0.879\n",
            "[5,   595] train_loss: 0.522 train_accuracy: 0.901 test_accuracy: 0.880\n",
            "[5,   600] train_loss: 0.367 train_accuracy: 0.887 test_accuracy: 0.870\n",
            "[5,   605] train_loss: 0.227 train_accuracy: 0.873 test_accuracy: 0.857\n",
            "[5,   610] train_loss: 0.347 train_accuracy: 0.874 test_accuracy: 0.861\n",
            "[5,   615] train_loss: 0.416 train_accuracy: 0.878 test_accuracy: 0.860\n",
            "[5,   620] train_loss: 0.555 train_accuracy: 0.890 test_accuracy: 0.867\n",
            "[5,   625] train_loss: 0.558 train_accuracy: 0.885 test_accuracy: 0.865\n",
            "[5,   630] train_loss: 0.176 train_accuracy: 0.881 test_accuracy: 0.857\n",
            "[5,   635] train_loss: 0.427 train_accuracy: 0.885 test_accuracy: 0.861\n",
            "[5,   640] train_loss: 0.292 train_accuracy: 0.883 test_accuracy: 0.864\n",
            "[5,   645] train_loss: 0.228 train_accuracy: 0.865 test_accuracy: 0.849\n",
            "[5,   650] train_loss: 0.562 train_accuracy: 0.862 test_accuracy: 0.846\n",
            "[5,   655] train_loss: 0.561 train_accuracy: 0.885 test_accuracy: 0.855\n",
            "[5,   660] train_loss: 0.505 train_accuracy: 0.888 test_accuracy: 0.854\n",
            "[5,   665] train_loss: 0.455 train_accuracy: 0.889 test_accuracy: 0.853\n",
            "[5,   670] train_loss: 0.197 train_accuracy: 0.892 test_accuracy: 0.861\n",
            "[5,   675] train_loss: 0.781 train_accuracy: 0.896 test_accuracy: 0.867\n",
            "[5,   680] train_loss: 0.464 train_accuracy: 0.880 test_accuracy: 0.858\n",
            "[5,   685] train_loss: 0.611 train_accuracy: 0.869 test_accuracy: 0.854\n",
            "[5,   690] train_loss: 0.332 train_accuracy: 0.858 test_accuracy: 0.845\n",
            "[5,   695] train_loss: 0.429 train_accuracy: 0.851 test_accuracy: 0.833\n",
            "[5,   700] train_loss: 0.909 train_accuracy: 0.842 test_accuracy: 0.825\n",
            "[5,   705] train_loss: 0.404 train_accuracy: 0.846 test_accuracy: 0.831\n",
            "[5,   710] train_loss: 0.289 train_accuracy: 0.866 test_accuracy: 0.841\n",
            "[5,   715] train_loss: 0.590 train_accuracy: 0.886 test_accuracy: 0.853\n",
            "[5,   720] train_loss: 0.691 train_accuracy: 0.884 test_accuracy: 0.854\n",
            "[5,   725] train_loss: 0.879 train_accuracy: 0.875 test_accuracy: 0.844\n",
            "[5,   730] train_loss: 0.709 train_accuracy: 0.872 test_accuracy: 0.839\n",
            "[5,   735] train_loss: 0.601 train_accuracy: 0.875 test_accuracy: 0.852\n",
            "[5,   740] train_loss: 0.487 train_accuracy: 0.878 test_accuracy: 0.861\n",
            "[5,   745] train_loss: 0.651 train_accuracy: 0.884 test_accuracy: 0.869\n",
            "[5,   750] train_loss: 0.314 train_accuracy: 0.888 test_accuracy: 0.872\n",
            "[5,   755] train_loss: 0.159 train_accuracy: 0.889 test_accuracy: 0.868\n",
            "[5,   760] train_loss: 0.213 train_accuracy: 0.888 test_accuracy: 0.865\n",
            "[5,   765] train_loss: 0.281 train_accuracy: 0.886 test_accuracy: 0.864\n",
            "[5,   770] train_loss: 0.686 train_accuracy: 0.885 test_accuracy: 0.864\n",
            "[5,   775] train_loss: 0.426 train_accuracy: 0.891 test_accuracy: 0.865\n",
            "[5,   780] train_loss: 0.241 train_accuracy: 0.869 test_accuracy: 0.837\n",
            "[5,   785] train_loss: 0.374 train_accuracy: 0.867 test_accuracy: 0.834\n",
            "[5,   790] train_loss: 0.266 train_accuracy: 0.870 test_accuracy: 0.835\n",
            "[5,   795] train_loss: 0.257 train_accuracy: 0.867 test_accuracy: 0.836\n",
            "[5,   800] train_loss: 0.880 train_accuracy: 0.874 test_accuracy: 0.842\n",
            "[5,   805] train_loss: 0.141 train_accuracy: 0.866 test_accuracy: 0.835\n",
            "[5,   810] train_loss: 0.473 train_accuracy: 0.870 test_accuracy: 0.838\n",
            "[5,   815] train_loss: 0.678 train_accuracy: 0.872 test_accuracy: 0.844\n",
            "[5,   820] train_loss: 0.271 train_accuracy: 0.857 test_accuracy: 0.826\n",
            "[5,   825] train_loss: 0.498 train_accuracy: 0.853 test_accuracy: 0.829\n",
            "[5,   830] train_loss: 0.618 train_accuracy: 0.863 test_accuracy: 0.839\n",
            "[5,   835] train_loss: 0.807 train_accuracy: 0.857 test_accuracy: 0.830\n",
            "[5,   840] train_loss: 0.148 train_accuracy: 0.855 test_accuracy: 0.827\n",
            "[5,   845] train_loss: 0.325 train_accuracy: 0.865 test_accuracy: 0.836\n",
            "[5,   850] train_loss: 0.411 train_accuracy: 0.879 test_accuracy: 0.849\n",
            "[5,   855] train_loss: 0.557 train_accuracy: 0.884 test_accuracy: 0.849\n",
            "[5,   860] train_loss: 0.216 train_accuracy: 0.892 test_accuracy: 0.859\n",
            "[5,   865] train_loss: 0.681 train_accuracy: 0.891 test_accuracy: 0.857\n",
            "[5,   870] train_loss: 0.490 train_accuracy: 0.870 test_accuracy: 0.839\n",
            "[5,   875] train_loss: 0.583 train_accuracy: 0.860 test_accuracy: 0.835\n",
            "[5,   880] train_loss: 0.612 train_accuracy: 0.859 test_accuracy: 0.836\n",
            "[5,   885] train_loss: 0.260 train_accuracy: 0.869 test_accuracy: 0.847\n",
            "[5,   890] train_loss: 0.416 train_accuracy: 0.874 test_accuracy: 0.851\n",
            "[5,   895] train_loss: 0.286 train_accuracy: 0.878 test_accuracy: 0.851\n",
            "[5,   900] train_loss: 0.564 train_accuracy: 0.864 test_accuracy: 0.837\n",
            "[5,   905] train_loss: 0.555 train_accuracy: 0.857 test_accuracy: 0.831\n",
            "[5,   910] train_loss: 0.313 train_accuracy: 0.875 test_accuracy: 0.851\n",
            "[5,   915] train_loss: 0.234 train_accuracy: 0.878 test_accuracy: 0.856\n",
            "[5,   920] train_loss: 0.576 train_accuracy: 0.878 test_accuracy: 0.854\n",
            "[5,   925] train_loss: 0.974 train_accuracy: 0.880 test_accuracy: 0.854\n",
            "[5,   930] train_loss: 0.593 train_accuracy: 0.870 test_accuracy: 0.845\n",
            "[5,   935] train_loss: 0.751 train_accuracy: 0.865 test_accuracy: 0.845\n",
            "[5,   940] train_loss: 0.513 train_accuracy: 0.859 test_accuracy: 0.842\n",
            "[5,   945] train_loss: 0.284 train_accuracy: 0.851 test_accuracy: 0.841\n",
            "[5,   950] train_loss: 0.265 train_accuracy: 0.853 test_accuracy: 0.837\n",
            "[5,   955] train_loss: 0.407 train_accuracy: 0.860 test_accuracy: 0.837\n",
            "[5,   960] train_loss: 0.567 train_accuracy: 0.872 test_accuracy: 0.848\n",
            "[5,   965] train_loss: 0.488 train_accuracy: 0.875 test_accuracy: 0.850\n",
            "[5,   970] train_loss: 0.365 train_accuracy: 0.875 test_accuracy: 0.848\n",
            "[5,   975] train_loss: 0.775 train_accuracy: 0.874 test_accuracy: 0.837\n",
            "[5,   980] train_loss: 0.221 train_accuracy: 0.859 test_accuracy: 0.814\n",
            "[5,   985] train_loss: 0.642 train_accuracy: 0.852 test_accuracy: 0.812\n",
            "[5,   990] train_loss: 0.517 train_accuracy: 0.867 test_accuracy: 0.843\n",
            "[5,   995] train_loss: 0.506 train_accuracy: 0.864 test_accuracy: 0.858\n",
            "[5,  1000] train_loss: 0.414 train_accuracy: 0.850 test_accuracy: 0.849\n",
            "[5,  1005] train_loss: 0.355 train_accuracy: 0.856 test_accuracy: 0.849\n",
            "[5,  1010] train_loss: 0.843 train_accuracy: 0.870 test_accuracy: 0.861\n",
            "[5,  1015] train_loss: 1.011 train_accuracy: 0.855 test_accuracy: 0.851\n",
            "[5,  1020] train_loss: 0.359 train_accuracy: 0.832 test_accuracy: 0.800\n",
            "[5,  1025] train_loss: 0.311 train_accuracy: 0.834 test_accuracy: 0.802\n",
            "[5,  1030] train_loss: 0.246 train_accuracy: 0.858 test_accuracy: 0.824\n",
            "[5,  1035] train_loss: 0.535 train_accuracy: 0.875 test_accuracy: 0.841\n",
            "[5,  1040] train_loss: 0.351 train_accuracy: 0.866 test_accuracy: 0.835\n",
            "[5,  1045] train_loss: 0.837 train_accuracy: 0.870 test_accuracy: 0.836\n",
            "[5,  1050] train_loss: 0.492 train_accuracy: 0.841 test_accuracy: 0.824\n",
            "[5,  1055] train_loss: 0.489 train_accuracy: 0.831 test_accuracy: 0.822\n",
            "[5,  1060] train_loss: 1.040 train_accuracy: 0.840 test_accuracy: 0.830\n",
            "[5,  1065] train_loss: 0.383 train_accuracy: 0.826 test_accuracy: 0.829\n",
            "[5,  1070] train_loss: 0.957 train_accuracy: 0.863 test_accuracy: 0.836\n",
            "[5,  1075] train_loss: 0.868 train_accuracy: 0.836 test_accuracy: 0.792\n",
            "[5,  1080] train_loss: 0.251 train_accuracy: 0.835 test_accuracy: 0.793\n",
            "[5,  1085] train_loss: 0.495 train_accuracy: 0.859 test_accuracy: 0.829\n",
            "[5,  1090] train_loss: 0.586 train_accuracy: 0.865 test_accuracy: 0.840\n",
            "[5,  1095] train_loss: 0.593 train_accuracy: 0.843 test_accuracy: 0.831\n",
            "[5,  1100] train_loss: 0.749 train_accuracy: 0.818 test_accuracy: 0.806\n",
            "[5,  1105] train_loss: 0.323 train_accuracy: 0.859 test_accuracy: 0.823\n",
            "[5,  1110] train_loss: 0.144 train_accuracy: 0.860 test_accuracy: 0.823\n",
            "[5,  1115] train_loss: 0.280 train_accuracy: 0.859 test_accuracy: 0.823\n",
            "[5,  1120] train_loss: 0.331 train_accuracy: 0.836 test_accuracy: 0.819\n",
            "[5,  1125] train_loss: 0.232 train_accuracy: 0.788 test_accuracy: 0.788\n",
            "[5,  1130] train_loss: 0.487 train_accuracy: 0.778 test_accuracy: 0.778\n",
            "[5,  1135] train_loss: 0.600 train_accuracy: 0.824 test_accuracy: 0.809\n",
            "[5,  1140] train_loss: 0.471 train_accuracy: 0.854 test_accuracy: 0.832\n",
            "[5,  1145] train_loss: 0.763 train_accuracy: 0.858 test_accuracy: 0.827\n",
            "[5,  1150] train_loss: 0.326 train_accuracy: 0.838 test_accuracy: 0.808\n",
            "[5,  1155] train_loss: 0.626 train_accuracy: 0.839 test_accuracy: 0.804\n",
            "[5,  1160] train_loss: 0.283 train_accuracy: 0.856 test_accuracy: 0.830\n",
            "[5,  1165] train_loss: 0.466 train_accuracy: 0.858 test_accuracy: 0.831\n",
            "[5,  1170] train_loss: 0.685 train_accuracy: 0.881 test_accuracy: 0.847\n",
            "[5,  1175] train_loss: 0.477 train_accuracy: 0.871 test_accuracy: 0.838\n",
            "[5,  1180] train_loss: 0.776 train_accuracy: 0.858 test_accuracy: 0.821\n",
            "[5,  1185] train_loss: 0.289 train_accuracy: 0.828 test_accuracy: 0.782\n",
            "[5,  1190] train_loss: 0.515 train_accuracy: 0.842 test_accuracy: 0.805\n",
            "[5,  1195] train_loss: 0.119 train_accuracy: 0.827 test_accuracy: 0.805\n",
            "[5,  1200] train_loss: 0.495 train_accuracy: 0.810 test_accuracy: 0.792\n",
            "[5,  1205] train_loss: 0.403 train_accuracy: 0.835 test_accuracy: 0.800\n",
            "[5,  1210] train_loss: 0.203 train_accuracy: 0.853 test_accuracy: 0.805\n",
            "[5,  1215] train_loss: 0.383 train_accuracy: 0.865 test_accuracy: 0.819\n",
            "[5,  1220] train_loss: 0.239 train_accuracy: 0.866 test_accuracy: 0.828\n",
            "[5,  1225] train_loss: 0.684 train_accuracy: 0.866 test_accuracy: 0.847\n",
            "[5,  1230] train_loss: 0.631 train_accuracy: 0.847 test_accuracy: 0.838\n",
            "[5,  1235] train_loss: 0.658 train_accuracy: 0.834 test_accuracy: 0.825\n",
            "[5,  1240] train_loss: 0.176 train_accuracy: 0.834 test_accuracy: 0.823\n",
            "[5,  1245] train_loss: 0.411 train_accuracy: 0.846 test_accuracy: 0.818\n",
            "[5,  1250] train_loss: 0.401 train_accuracy: 0.834 test_accuracy: 0.807\n",
            "[5,  1255] train_loss: 0.956 train_accuracy: 0.862 test_accuracy: 0.849\n",
            "[5,  1260] train_loss: 0.298 train_accuracy: 0.834 test_accuracy: 0.841\n",
            "[5,  1265] train_loss: 0.362 train_accuracy: 0.846 test_accuracy: 0.838\n",
            "[5,  1270] train_loss: 0.388 train_accuracy: 0.839 test_accuracy: 0.813\n",
            "[5,  1275] train_loss: 0.773 train_accuracy: 0.844 test_accuracy: 0.801\n",
            "[5,  1280] train_loss: 0.690 train_accuracy: 0.872 test_accuracy: 0.837\n",
            "[5,  1285] train_loss: 0.210 train_accuracy: 0.860 test_accuracy: 0.831\n",
            "[5,  1290] train_loss: 0.474 train_accuracy: 0.854 test_accuracy: 0.830\n",
            "[5,  1295] train_loss: 0.263 train_accuracy: 0.863 test_accuracy: 0.840\n",
            "[5,  1300] train_loss: 0.472 train_accuracy: 0.851 test_accuracy: 0.838\n",
            "[5,  1305] train_loss: 0.478 train_accuracy: 0.847 test_accuracy: 0.817\n",
            "[5,  1310] train_loss: 0.250 train_accuracy: 0.848 test_accuracy: 0.813\n",
            "[5,  1315] train_loss: 0.379 train_accuracy: 0.843 test_accuracy: 0.807\n",
            "[5,  1320] train_loss: 0.173 train_accuracy: 0.851 test_accuracy: 0.817\n",
            "[5,  1325] train_loss: 0.793 train_accuracy: 0.865 test_accuracy: 0.835\n",
            "[5,  1330] train_loss: 0.880 train_accuracy: 0.870 test_accuracy: 0.836\n",
            "[5,  1335] train_loss: 0.594 train_accuracy: 0.837 test_accuracy: 0.801\n",
            "[5,  1340] train_loss: 1.094 train_accuracy: 0.822 test_accuracy: 0.784\n",
            "[5,  1345] train_loss: 0.368 train_accuracy: 0.848 test_accuracy: 0.816\n",
            "[5,  1350] train_loss: 0.344 train_accuracy: 0.837 test_accuracy: 0.797\n",
            "[5,  1355] train_loss: 0.636 train_accuracy: 0.864 test_accuracy: 0.831\n",
            "[5,  1360] train_loss: 0.231 train_accuracy: 0.857 test_accuracy: 0.830\n",
            "[5,  1365] train_loss: 0.330 train_accuracy: 0.858 test_accuracy: 0.837\n",
            "[5,  1370] train_loss: 0.428 train_accuracy: 0.871 test_accuracy: 0.847\n",
            "[5,  1375] train_loss: 0.827 train_accuracy: 0.885 test_accuracy: 0.855\n",
            "[5,  1380] train_loss: 0.346 train_accuracy: 0.868 test_accuracy: 0.841\n",
            "[5,  1385] train_loss: 0.669 train_accuracy: 0.861 test_accuracy: 0.831\n",
            "[5,  1390] train_loss: 0.220 train_accuracy: 0.857 test_accuracy: 0.825\n",
            "[5,  1395] train_loss: 0.226 train_accuracy: 0.858 test_accuracy: 0.830\n",
            "[5,  1400] train_loss: 0.302 train_accuracy: 0.870 test_accuracy: 0.844\n",
            "[5,  1405] train_loss: 0.396 train_accuracy: 0.876 test_accuracy: 0.852\n",
            "[5,  1410] train_loss: 0.337 train_accuracy: 0.878 test_accuracy: 0.855\n",
            "[5,  1415] train_loss: 0.354 train_accuracy: 0.866 test_accuracy: 0.844\n",
            "[5,  1420] train_loss: 0.830 train_accuracy: 0.843 test_accuracy: 0.822\n",
            "[5,  1425] train_loss: 0.809 train_accuracy: 0.785 test_accuracy: 0.776\n",
            "[5,  1430] train_loss: 0.995 train_accuracy: 0.789 test_accuracy: 0.778\n",
            "[5,  1435] train_loss: 0.602 train_accuracy: 0.812 test_accuracy: 0.797\n",
            "[5,  1440] train_loss: 0.639 train_accuracy: 0.826 test_accuracy: 0.808\n",
            "[5,  1445] train_loss: 0.354 train_accuracy: 0.836 test_accuracy: 0.811\n",
            "[5,  1450] train_loss: 0.443 train_accuracy: 0.866 test_accuracy: 0.842\n",
            "[5,  1455] train_loss: 0.379 train_accuracy: 0.860 test_accuracy: 0.830\n",
            "[5,  1460] train_loss: 0.275 train_accuracy: 0.843 test_accuracy: 0.812\n",
            "[5,  1465] train_loss: 0.488 train_accuracy: 0.850 test_accuracy: 0.824\n",
            "[5,  1470] train_loss: 0.696 train_accuracy: 0.850 test_accuracy: 0.829\n",
            "[5,  1475] train_loss: 0.235 train_accuracy: 0.810 test_accuracy: 0.802\n",
            "[5,  1480] train_loss: 0.192 train_accuracy: 0.807 test_accuracy: 0.802\n",
            "[5,  1485] train_loss: 0.671 train_accuracy: 0.859 test_accuracy: 0.850\n",
            "[5,  1490] train_loss: 0.463 train_accuracy: 0.863 test_accuracy: 0.832\n",
            "[5,  1495] train_loss: 1.027 train_accuracy: 0.834 test_accuracy: 0.810\n",
            "[5,  1500] train_loss: 0.681 train_accuracy: 0.804 test_accuracy: 0.785\n",
            "[5,  1505] train_loss: 0.377 train_accuracy: 0.758 test_accuracy: 0.749\n",
            "[5,  1510] train_loss: 1.127 train_accuracy: 0.820 test_accuracy: 0.813\n",
            "[5,  1515] train_loss: 0.957 train_accuracy: 0.849 test_accuracy: 0.844\n",
            "[5,  1520] train_loss: 0.519 train_accuracy: 0.832 test_accuracy: 0.807\n",
            "[5,  1525] train_loss: 0.389 train_accuracy: 0.856 test_accuracy: 0.834\n",
            "[5,  1530] train_loss: 0.529 train_accuracy: 0.866 test_accuracy: 0.848\n",
            "[5,  1535] train_loss: 0.443 train_accuracy: 0.872 test_accuracy: 0.847\n",
            "[5,  1540] train_loss: 0.713 train_accuracy: 0.881 test_accuracy: 0.854\n",
            "[5,  1545] train_loss: 0.377 train_accuracy: 0.847 test_accuracy: 0.815\n",
            "[5,  1550] train_loss: 0.305 train_accuracy: 0.801 test_accuracy: 0.761\n",
            "[5,  1555] train_loss: 0.562 train_accuracy: 0.846 test_accuracy: 0.801\n",
            "[5,  1560] train_loss: 0.627 train_accuracy: 0.852 test_accuracy: 0.812\n",
            "[5,  1565] train_loss: 0.340 train_accuracy: 0.830 test_accuracy: 0.799\n",
            "[5,  1570] train_loss: 0.517 train_accuracy: 0.832 test_accuracy: 0.804\n",
            "[5,  1575] train_loss: 0.416 train_accuracy: 0.866 test_accuracy: 0.835\n",
            "[5,  1580] train_loss: 0.116 train_accuracy: 0.854 test_accuracy: 0.832\n",
            "[5,  1585] train_loss: 0.342 train_accuracy: 0.835 test_accuracy: 0.818\n",
            "[5,  1590] train_loss: 0.414 train_accuracy: 0.829 test_accuracy: 0.819\n",
            "[5,  1595] train_loss: 0.835 train_accuracy: 0.848 test_accuracy: 0.831\n",
            "[5,  1600] train_loss: 0.553 train_accuracy: 0.862 test_accuracy: 0.835\n",
            "[5,  1605] train_loss: 0.422 train_accuracy: 0.870 test_accuracy: 0.839\n",
            "[5,  1610] train_loss: 0.782 train_accuracy: 0.862 test_accuracy: 0.832\n",
            "[5,  1615] train_loss: 0.353 train_accuracy: 0.822 test_accuracy: 0.805\n",
            "[5,  1620] train_loss: 0.298 train_accuracy: 0.832 test_accuracy: 0.817\n",
            "[5,  1625] train_loss: 0.355 train_accuracy: 0.857 test_accuracy: 0.827\n",
            "[5,  1630] train_loss: 0.169 train_accuracy: 0.855 test_accuracy: 0.821\n",
            "[5,  1635] train_loss: 0.499 train_accuracy: 0.880 test_accuracy: 0.845\n",
            "[5,  1640] train_loss: 0.158 train_accuracy: 0.872 test_accuracy: 0.851\n",
            "[5,  1645] train_loss: 0.493 train_accuracy: 0.851 test_accuracy: 0.833\n",
            "[5,  1650] train_loss: 0.677 train_accuracy: 0.855 test_accuracy: 0.830\n",
            "[5,  1655] train_loss: 0.603 train_accuracy: 0.866 test_accuracy: 0.840\n",
            "[5,  1660] train_loss: 0.520 train_accuracy: 0.872 test_accuracy: 0.841\n",
            "[5,  1665] train_loss: 0.583 train_accuracy: 0.853 test_accuracy: 0.816\n",
            "[5,  1670] train_loss: 0.757 train_accuracy: 0.824 test_accuracy: 0.777\n",
            "[5,  1675] train_loss: 0.619 train_accuracy: 0.821 test_accuracy: 0.775\n",
            "[5,  1680] train_loss: 1.041 train_accuracy: 0.787 test_accuracy: 0.737\n",
            "[5,  1685] train_loss: 0.665 train_accuracy: 0.780 test_accuracy: 0.737\n",
            "[5,  1690] train_loss: 0.702 train_accuracy: 0.827 test_accuracy: 0.788\n",
            "[5,  1695] train_loss: 0.220 train_accuracy: 0.831 test_accuracy: 0.794\n",
            "[5,  1700] train_loss: 0.644 train_accuracy: 0.834 test_accuracy: 0.808\n",
            "[5,  1705] train_loss: 0.429 train_accuracy: 0.827 test_accuracy: 0.806\n",
            "[5,  1710] train_loss: 0.279 train_accuracy: 0.823 test_accuracy: 0.798\n",
            "[5,  1715] train_loss: 0.748 train_accuracy: 0.805 test_accuracy: 0.773\n",
            "[5,  1720] train_loss: 0.696 train_accuracy: 0.792 test_accuracy: 0.762\n",
            "[5,  1725] train_loss: 0.398 train_accuracy: 0.782 test_accuracy: 0.756\n"
          ]
        }
      ],
      "source": [
        "import numpy as np\n",
        "import torch\n",
        "import torchvision\n",
        "import torch.nn as nn\n",
        "from matplotlib import pyplot as plt\n",
        "import random\n",
        "import torch.optim as optim\n",
        "import torchvision.transforms as transforms\n",
        "import time\n",
        "import torch.nn.functional as F\n",
        "from collections import Counter, defaultdict\n",
        "from itertools import combinations\n",
        "\n",
        "\n",
        "a_train = []  # To store training accuracy\n",
        "a_test = []  # To store test accuracy\n",
        "lossaaa = []  # To store loss values\n",
        "Inf = []  # To store information values\n",
        "Var_all = []  # To store all variation values\n",
        "Generalization_Ratio_ = []  # To store generalization ratios\n",
        "dicide_action = []  # To store decision actions\n",
        "loss_before = torch.tensor(30.0)  # Initial loss value\n",
        "los = torch.tensor(30.0)  # Initial loss value\n",
        "dis_before_A1 = torch.tensor(0)  # Distance before action A1\n",
        "dis_before_A2 = torch.tensor(0)  # Distance before action A2\n",
        "dis = torch.tensor(1)  # Distance increment\n",
        "per = \"N\"  # Previous iteration's action\n",
        "state_before = torch.tensor(0.0)  # Previous state value\n",
        "categrary_number = 10  # Number of categories\n",
        "tra_val_number = 2  # Training validation number\n",
        "\n",
        "for epoch in range(5):  # Loop over the dataset multiple times\n",
        "    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader\n",
        "    running_loss = 0.0  # Running loss\n",
        "    running_loss_all = 0.0  # Running loss for all\n",
        "    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable\n",
        "    los = Variable(los, requires_grad=False)  # Make los a variable\n",
        "    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable\n",
        "    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable\n",
        "    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable\n",
        "    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable\n",
        "    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable\n",
        "    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable\n",
        "\n",
        "    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data\n",
        "        ### calculate losses\n",
        "        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability\n",
        "        labels = labels.cuda()  # Move labels to GPU\n",
        "        imgs = imgs.cuda()  # Move images to GPU\n",
        "        out_e1234 = net(imgs)  # Get output from the network\n",
        "        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)\n",
        "\n",
        "        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter\n",
        "        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU\n",
        "        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU\n",
        "        out_e3412 = net(e3412_imgs)  # Get output from the network\n",
        "        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)\n",
        "\n",
        "        #################### extract\n",
        "        ## e1 ext count\n",
        "        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU\n",
        "        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU\n",
        "        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ## e2 ext count\n",
        "        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU\n",
        "        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU\n",
        "        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ########### e1 count\n",
        "        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU\n",
        "        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU\n",
        "        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network\n",
        "        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks\n",
        "        split_e12_inform = list(split_e12_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e12_inform)):\n",
        "            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e12 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e12[category] = counts_E12[category]  # Update counts\n",
        "            split_e12_inform[i] = counts_e12  # Store updated counts\n",
        "\n",
        "        ########### e2 count\n",
        "        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU\n",
        "        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU\n",
        "        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network\n",
        "        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks\n",
        "        split_e34_inform = list(split_e34_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e34_inform)):\n",
        "            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e34 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e34[category] = counts_E34[category]  # Update counts\n",
        "            split_e34_inform[i] = counts_e34  # Store updated counts\n",
        "\n",
        "        ##########\n",
        "        ## IN_IN_pro\n",
        "        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform\n",
        "        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform\n",
        "        counts_in_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_in_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_12[category] = counts_i_12[category]  # Update counts for e12\n",
        "        total_samples_in_12 = len(e12_inform)  # Total samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_34[category] = counts_i_34[category]  # Update counts for e34\n",
        "        total_samples_in_34 = len(e34_inform)  # Total samples in e34\n",
        "\n",
        "        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12\n",
        "        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12\n",
        "        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12\n",
        "\n",
        "        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34\n",
        "        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34\n",
        "        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34\n",
        "\n",
        "        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted\n",
        "        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted\n",
        "        counts_extra_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_extra_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted\n",
        "        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted\n",
        "        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34\n",
        "\n",
        "        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted\n",
        "        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted\n",
        "        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted\n",
        "\n",
        "        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted\n",
        "        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted\n",
        "        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted\n",
        "\n",
        "        ############################### Variation x ###################################\n",
        "        in_12_all = probabilities_tensor_in_12\n",
        "        in_34_all = probabilities_tensor_in_34\n",
        "        in_12_extra = probabilities_tensor_extra_12\n",
        "        in_34_extra = probabilities_tensor_extra_34\n",
        "        weight_tra_probility_all = 1.0 / 2\n",
        "        dow_all = \\\n",
        "              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +\n",
        "              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)\n",
        "        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all\n",
        "        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all\n",
        "        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all\n",
        "        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all\n",
        "        in_1_all = in_1_all[0]\n",
        "        in_2_all = in_2_all[0]\n",
        "        e_1_all = e_1_all[0]\n",
        "        e_2_all = e_2_all[0]\n",
        "        d_KL_all = torch.zeros(10)\n",
        "        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)\n",
        "        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)\n",
        "        d_KL_all = torch.max(abs(k_divergence_all))\n",
        "        d_KL_all_ = torch.max(abs(k_divergence_all_))\n",
        "        Variation_all = torch.max(d_KL_all, d_KL_all_)\n",
        "        Var_all.append(Variation_all)\n",
        "\n",
        "        ############################ Information ###################################\n",
        "        all_combinations = list(combinations(range(10), 2))\n",
        "        K = categrary_number * (categrary_number - 1)\n",
        "        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor\n",
        "        for c in range(10):\n",
        "            for idx, (i, j) in enumerate(all_combinations):\n",
        "                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                min_value = torch.min(s1, s3)\n",
        "                result_tensor[idx] = min_value.item()\n",
        "                idx_ = idx + len(all_combinations)\n",
        "                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                min_value_ = torch.min(s1_, s3_)\n",
        "                result_tensor[idx_] = min_value_.item()\n",
        "        Information = torch.sum(result_tensor) / K\n",
        "        Inf.append(Information)\n",
        "\n",
        "        ############################ Generalization_Ratio ###################################\n",
        "        Generalization_Ratio = Variation_all * (Information + 1.0) / Information\n",
        "        Generalization_Ratio_.append(Generalization_Ratio)\n",
        "\n",
        "        ############################ Generalization Decision Process (GDP) ###################################\n",
        "        state_now = Generalization_Ratio\n",
        "        loss_before = loss_before.cuda()\n",
        "        state_before = state_before.cuda()\n",
        "        state_dis = state_now - state_before\n",
        "        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)\n",
        "\n",
        "        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,\n",
        "        # choose one based on the reward\n",
        "        if state_dis >= 0.0:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "        else:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "\n",
        "        ####################################\n",
        "        #### optimizer\n",
        "        los = loss - loss_before\n",
        "        ####################################\n",
        "        loss = loss_out_e1234  # Losses from not using the selection\n",
        "        ####################################\n",
        "        state_before = state_now\n",
        "        optimizer_L.zero_grad()\n",
        "        loss.backward()\n",
        "        optimizer_L.step()\n",
        "        loss_before = loss\n",
        "        running_loss += loss.item()\n",
        "\n",
        "        # Print statistics\n",
        "        if step % 5 == 4:  # Print every 500 mini-batches\n",
        "            with torch.no_grad():  # 'with' is a context manager\n",
        "                s_test_image = s_test_image.cuda()\n",
        "                s_test_label = s_test_label.cuda()\n",
        "                s_tra_image = s_tra_image.cuda()\n",
        "                s_tra_label = s_tra_label.cuda()\n",
        "                outputs = net(s_test_image)  # [batch, 10]\n",
        "                predict_y = torch.max(outputs, dim=1)[1]\n",
        "                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)\n",
        "                a_test.append(float(accuracy))\n",
        "                outputs_t = net(s_tra_image)  # [batch, 10]\n",
        "                predict_y_t = torch.max(outputs_t, dim=1)[1]\n",
        "                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)\n",
        "                a_train.append(float(accuracy_t))\n",
        "                lossaaa.append(float(running_loss / 5))\n",
        "                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %\n",
        "                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))\n",
        "                running_loss = 0.0\n",
        "                running_loss = 0.0"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "21Nf-vKHpDit"
      },
      "outputs": [],
      "source": [
        "# Writing training accuracy to file\n",
        "with open('Accuracy_a_train.txt', 'w') as file:\n",
        "    for i in range(len(a_train)):\n",
        "        file.write(str(a_train[i]))\n",
        "        if i < len(a_train) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing test accuracy to file\n",
        "with open('Accuracy_a_test.txt', 'w') as file:\n",
        "    for i in range(len(a_test)):\n",
        "        file.write(str(a_test[i]))\n",
        "        if i < len(a_test) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing loss values to file\n",
        "with open('Accuracy_loss.txt', 'w') as file:\n",
        "    for i in range(len(lossaaa)):\n",
        "        file.write(str(lossaaa[i]))\n",
        "        if i < len(lossaaa) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ELDD-CQupQ8z"
      },
      "outputs": [],
      "source": [
        "# Convert tensors to lists\n",
        "Inf_list = [tensor.item() for tensor in Inf]\n",
        "Var_av_list = [tensor.item() for tensor in Var_all]\n",
        "Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]\n",
        "\n",
        "# Writing Inf_list to file\n",
        "with open('Inf_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Inf_list)):\n",
        "        file.write(str(Inf_list[i]))\n",
        "        if i < len(Inf_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Var_av_list to file\n",
        "with open('Var_all_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Var_av_list)):\n",
        "        file.write(str(Var_av_list[i]))\n",
        "        if i < len(Var_av_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Generalization_Ratio_list to file\n",
        "with open('Generalization_Ratio_list.txt', 'w') as file:\n",
        "    for i in range(len(Generalization_Ratio_list)):\n",
        "        file.write(str(Generalization_Ratio_list[i]))\n",
        "        if i < len(Generalization_Ratio_list) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "UTNaZkRQpU6e"
      },
      "outputs": [],
      "source": [
        "import time\n",
        "import os  # Import the os module\n",
        "\n",
        "os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist\n",
        "model_path = './Models/'  # Define the model path\n",
        "\n",
        "rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'\n",
        "\n",
        "# Save the training results\n",
        "current_model_path = model_path + rq + \"_model.pkl\"  # Create the full path for the model file with the current timestamp\n",
        "torch.save(net, current_model_path)  # Save the model to the specified path\n",
        "print(\"Saved model file: \" + current_model_path)  # Print the path of the saved model file"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3bughS24pYYi",
        "outputId": "e0719e4a-c7d9-4c29-ae52-a9dd8cc0896f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "train: 0.9033389926428975\n",
            "test: 0.8799546998867497\n",
            "loss: 0.0902311198413372\n"
          ]
        }
      ],
      "source": [
        "# Display the highest value that appears during training\n",
        "print(\"train:\", max(a_train))  # Print the highest value in the training data\n",
        "print(\"test:\", max(a_test))  # Print the highest value in the test data\n",
        "print(\"loss:\", min(lossaaa))  # Print the minimum value of the loss"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vt4BE5xOpglu",
        "outputId": "8a8315b4-ff7b-453f-8ac6-6feb5eff5a8c"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "train: 0.7633069082672707\n",
            "test: 0.7345\n"
          ]
        }
      ],
      "source": [
        "# Load training and testing datasets from specified directories and apply transformations\n",
        "trainset = datasets.ImageFolder(root='/content/colorized-MNIST/training', transform=transform)\n",
        "testset = datasets.ImageFolder(root='/content/colorized-MNIST/testing', transform=transform)\n",
        "# Create DataLoader for training and testing datasets with specified batch sizes and other parameters\n",
        "trainloader = torch.utils.data.DataLoader(trainset, batch_size=8830, shuffle=True, num_workers=0)\n",
        "t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)\n",
        "# Get an iterator for the training DataLoader\n",
        "trainloader_iter = iter(trainloader)\n",
        "# Get the next batch of images and labels from the training DataLoader\n",
        "tl_image, tl_label = next(trainloader_iter)\n",
        "# Move the training images and labels to the GPU\n",
        "tl_image = tl_image.cuda()\n",
        "tl_label = tl_label.cuda()\n",
        "# Pass the training images through the neural network to get the outputs\n",
        "tl_imageoutputs = net(tl_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict_y = torch.max(tl_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)\n",
        "# Print the training accuracy\n",
        "print(\"train:\", float(accuracy))\n",
        "# Get an iterator for the testing DataLoader\n",
        "t_data_iter = iter(t_loader)\n",
        "# Get the next batch of images and labels from the testing DataLoader\n",
        "t_image, t_label = next(t_data_iter)\n",
        "# Move the testing images and labels to the GPU\n",
        "t_image = t_image.cuda()\n",
        "t_label = t_label.cuda()\n",
        "# Pass the testing images through the neural network to get the outputs\n",
        "t_imageoutputs = net(t_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict = torch.max(t_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)\n",
        "# Print the testing accuracy\n",
        "print(\"test:\", float(accuracy_t))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HriILnIxSwkr"
      },
      "source": [
        "### **Network2 : use linear layers and active layers + Generalization Decision Process (GDP)**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BPjSXQMcxW1-"
      },
      "outputs": [],
      "source": [
        "import argparse\n",
        "import os\n",
        "import numpy as np\n",
        "import torchvision.transforms as transforms\n",
        "from torchvision.utils import save_image\n",
        "from torch.utils.data import DataLoader\n",
        "from torchvision import datasets\n",
        "from torch.autograd import Variable\n",
        "import torch.nn as nn\n",
        "import torch\n",
        "\n",
        "class LANet(nn.Module):\n",
        "    def __init__(self):  # Initialization function\n",
        "        super(LANet, self).__init__()\n",
        "\n",
        "        self.fc1 = nn.Linear(3*28*28, 1000)  # Define the first fully connected layer\n",
        "        self.fc2 = nn.Linear(1000, 500)  # Define the second fully connected layer\n",
        "        self.fc3 = nn.Linear(500, 100)  # Define the third fully connected layer\n",
        "        self.fc4 = nn.Linear(100, 50)  # Define the fourth fully connected layer\n",
        "        self.fc5 = nn.Linear(50, 25)  # Define the fifth fully connected layer\n",
        "        self.fc6 = nn.Linear(25, 20)  # Define the sixth fully connected layer\n",
        "        self.fc7 = nn.Linear(20, 10)  # Define the seventh fully connected layer\n",
        "\n",
        "    def forward(self, x):  # Define the forward pass\n",
        "\n",
        "        x = x.view(-1, 3*28*28)  # Flatten the input tensor\n",
        "        x = F.relu(self.fc1(x))  # Apply ReLU activation after the first layer\n",
        "        x = self.fc2(x)  # Apply the second layer\n",
        "        x = F.relu(self.fc3(x))  # Apply ReLU activation after the third layer\n",
        "        x = self.fc4(x)  # Apply the fourth layer\n",
        "        x = F.relu(self.fc5(x))  # Apply ReLU activation after the fifth layer\n",
        "        x = F.relu(self.fc6(x))  # Apply ReLU activation after the sixth layer\n",
        "        x = self.fc7(x)  # Apply the seventh layer\n",
        "        return x  # Return the output\n",
        "\n",
        "loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function as CrossEntropyLoss\n",
        "net = LANet()  # Instantiate the LANet model\n",
        "# If a GPU is available, run in CUDA mode\n",
        "if torch.cuda.is_available():\n",
        "    net = net.cuda()  # Move the network to the GPU\n",
        "    loss_function = loss_function.cuda()  # Move the loss function to the GPU\n",
        "optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Initialize the Adam optimizer with learning rate 0.001"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "GgLqQB-sS3W6",
        "outputId": "87e26819-86e6-462a-fa1c-485f8497e66a"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[1,     5] train_loss: 2.280 train_accuracy: 0.103 test_accuracy: 0.114\n",
            "[1,    10] train_loss: 2.474 train_accuracy: 0.106 test_accuracy: 0.111\n",
            "[1,    15] train_loss: 2.316 train_accuracy: 0.101 test_accuracy: 0.118\n",
            "[1,    20] train_loss: 2.394 train_accuracy: 0.108 test_accuracy: 0.113\n",
            "[1,    25] train_loss: 2.404 train_accuracy: 0.125 test_accuracy: 0.123\n",
            "[1,    30] train_loss: 2.382 train_accuracy: 0.098 test_accuracy: 0.061\n",
            "[1,    35] train_loss: 2.302 train_accuracy: 0.100 test_accuracy: 0.070\n",
            "[1,    40] train_loss: 2.324 train_accuracy: 0.095 test_accuracy: 0.069\n",
            "[1,    45] train_loss: 2.336 train_accuracy: 0.102 test_accuracy: 0.112\n",
            "[1,    50] train_loss: 2.289 train_accuracy: 0.102 test_accuracy: 0.077\n",
            "[1,    55] train_loss: 2.249 train_accuracy: 0.111 test_accuracy: 0.091\n",
            "[1,    60] train_loss: 2.318 train_accuracy: 0.102 test_accuracy: 0.047\n",
            "[1,    65] train_loss: 2.338 train_accuracy: 0.102 test_accuracy: 0.075\n",
            "[1,    70] train_loss: 2.330 train_accuracy: 0.118 test_accuracy: 0.132\n",
            "[1,    75] train_loss: 2.353 train_accuracy: 0.118 test_accuracy: 0.134\n",
            "[1,    80] train_loss: 2.303 train_accuracy: 0.144 test_accuracy: 0.150\n",
            "[1,    85] train_loss: 2.271 train_accuracy: 0.093 test_accuracy: 0.054\n",
            "[1,    90] train_loss: 2.298 train_accuracy: 0.130 test_accuracy: 0.072\n",
            "[1,    95] train_loss: 2.245 train_accuracy: 0.137 test_accuracy: 0.081\n",
            "[1,   100] train_loss: 2.291 train_accuracy: 0.198 test_accuracy: 0.226\n",
            "[1,   105] train_loss: 2.311 train_accuracy: 0.175 test_accuracy: 0.199\n",
            "[1,   110] train_loss: 2.210 train_accuracy: 0.263 test_accuracy: 0.299\n",
            "[1,   115] train_loss: 1.991 train_accuracy: 0.239 test_accuracy: 0.271\n",
            "[1,   120] train_loss: 2.132 train_accuracy: 0.186 test_accuracy: 0.205\n",
            "[1,   125] train_loss: 2.149 train_accuracy: 0.179 test_accuracy: 0.205\n",
            "[1,   130] train_loss: 2.393 train_accuracy: 0.171 test_accuracy: 0.189\n",
            "[1,   135] train_loss: 2.276 train_accuracy: 0.102 test_accuracy: 0.114\n",
            "[1,   140] train_loss: 2.364 train_accuracy: 0.104 test_accuracy: 0.110\n",
            "[1,   145] train_loss: 2.261 train_accuracy: 0.115 test_accuracy: 0.133\n",
            "[1,   150] train_loss: 2.436 train_accuracy: 0.115 test_accuracy: 0.057\n",
            "[1,   155] train_loss: 2.292 train_accuracy: 0.122 test_accuracy: 0.135\n",
            "[1,   160] train_loss: 2.320 train_accuracy: 0.109 test_accuracy: 0.106\n",
            "[1,   165] train_loss: 2.424 train_accuracy: 0.098 test_accuracy: 0.064\n",
            "[1,   170] train_loss: 2.249 train_accuracy: 0.103 test_accuracy: 0.105\n",
            "[1,   175] train_loss: 2.181 train_accuracy: 0.128 test_accuracy: 0.147\n",
            "[1,   180] train_loss: 2.311 train_accuracy: 0.131 test_accuracy: 0.149\n",
            "[1,   185] train_loss: 2.341 train_accuracy: 0.135 test_accuracy: 0.150\n",
            "[1,   190] train_loss: 2.089 train_accuracy: 0.170 test_accuracy: 0.184\n",
            "[1,   195] train_loss: 2.158 train_accuracy: 0.123 test_accuracy: 0.138\n",
            "[1,   200] train_loss: 2.110 train_accuracy: 0.229 test_accuracy: 0.259\n",
            "[1,   205] train_loss: 1.909 train_accuracy: 0.239 test_accuracy: 0.270\n",
            "[1,   210] train_loss: 2.155 train_accuracy: 0.223 test_accuracy: 0.248\n",
            "[1,   215] train_loss: 1.930 train_accuracy: 0.228 test_accuracy: 0.261\n",
            "[1,   220] train_loss: 2.278 train_accuracy: 0.234 test_accuracy: 0.266\n",
            "[1,   225] train_loss: 2.138 train_accuracy: 0.179 test_accuracy: 0.199\n",
            "[1,   230] train_loss: 2.062 train_accuracy: 0.183 test_accuracy: 0.199\n",
            "[1,   235] train_loss: 1.960 train_accuracy: 0.254 test_accuracy: 0.228\n",
            "[1,   240] train_loss: 2.061 train_accuracy: 0.278 test_accuracy: 0.264\n",
            "[1,   245] train_loss: 2.279 train_accuracy: 0.266 test_accuracy: 0.255\n",
            "[1,   250] train_loss: 2.099 train_accuracy: 0.249 test_accuracy: 0.218\n",
            "[1,   255] train_loss: 2.119 train_accuracy: 0.242 test_accuracy: 0.265\n",
            "[1,   260] train_loss: 2.018 train_accuracy: 0.248 test_accuracy: 0.275\n",
            "[1,   265] train_loss: 1.904 train_accuracy: 0.236 test_accuracy: 0.271\n",
            "[1,   270] train_loss: 2.117 train_accuracy: 0.220 test_accuracy: 0.253\n",
            "[1,   275] train_loss: 1.878 train_accuracy: 0.232 test_accuracy: 0.259\n",
            "[1,   280] train_loss: 1.784 train_accuracy: 0.202 test_accuracy: 0.230\n",
            "[1,   285] train_loss: 2.022 train_accuracy: 0.257 test_accuracy: 0.279\n",
            "[1,   290] train_loss: 1.800 train_accuracy: 0.288 test_accuracy: 0.327\n",
            "[1,   295] train_loss: 2.012 train_accuracy: 0.292 test_accuracy: 0.327\n",
            "[1,   300] train_loss: 2.178 train_accuracy: 0.305 test_accuracy: 0.345\n",
            "[1,   305] train_loss: 1.783 train_accuracy: 0.293 test_accuracy: 0.322\n",
            "[1,   310] train_loss: 1.899 train_accuracy: 0.260 test_accuracy: 0.298\n",
            "[1,   315] train_loss: 1.885 train_accuracy: 0.300 test_accuracy: 0.338\n",
            "[1,   320] train_loss: 1.860 train_accuracy: 0.300 test_accuracy: 0.327\n",
            "[1,   325] train_loss: 1.876 train_accuracy: 0.284 test_accuracy: 0.240\n",
            "[1,   330] train_loss: 2.049 train_accuracy: 0.273 test_accuracy: 0.222\n",
            "[1,   335] train_loss: 1.693 train_accuracy: 0.302 test_accuracy: 0.283\n",
            "[1,   340] train_loss: 1.977 train_accuracy: 0.244 test_accuracy: 0.279\n",
            "[1,   345] train_loss: 2.418 train_accuracy: 0.297 test_accuracy: 0.333\n",
            "[1,   350] train_loss: 2.170 train_accuracy: 0.259 test_accuracy: 0.283\n",
            "[1,   355] train_loss: 2.217 train_accuracy: 0.238 test_accuracy: 0.236\n",
            "[1,   360] train_loss: 1.991 train_accuracy: 0.253 test_accuracy: 0.228\n",
            "[1,   365] train_loss: 2.002 train_accuracy: 0.283 test_accuracy: 0.288\n",
            "[1,   370] train_loss: 2.207 train_accuracy: 0.303 test_accuracy: 0.293\n",
            "[1,   375] train_loss: 1.919 train_accuracy: 0.324 test_accuracy: 0.296\n",
            "[1,   380] train_loss: 1.764 train_accuracy: 0.302 test_accuracy: 0.284\n",
            "[1,   385] train_loss: 1.529 train_accuracy: 0.316 test_accuracy: 0.342\n",
            "[1,   390] train_loss: 1.761 train_accuracy: 0.371 test_accuracy: 0.321\n",
            "[1,   395] train_loss: 1.572 train_accuracy: 0.357 test_accuracy: 0.312\n",
            "[1,   400] train_loss: 1.309 train_accuracy: 0.358 test_accuracy: 0.313\n",
            "[1,   405] train_loss: 1.904 train_accuracy: 0.375 test_accuracy: 0.374\n",
            "[1,   410] train_loss: 1.313 train_accuracy: 0.363 test_accuracy: 0.344\n",
            "[1,   415] train_loss: 1.836 train_accuracy: 0.363 test_accuracy: 0.340\n",
            "[1,   420] train_loss: 1.686 train_accuracy: 0.334 test_accuracy: 0.275\n",
            "[1,   425] train_loss: 1.567 train_accuracy: 0.408 test_accuracy: 0.376\n",
            "[1,   430] train_loss: 1.424 train_accuracy: 0.401 test_accuracy: 0.367\n",
            "[1,   435] train_loss: 1.777 train_accuracy: 0.417 test_accuracy: 0.408\n",
            "[1,   440] train_loss: 1.945 train_accuracy: 0.394 test_accuracy: 0.446\n",
            "[1,   445] train_loss: 1.468 train_accuracy: 0.391 test_accuracy: 0.437\n",
            "[1,   450] train_loss: 1.566 train_accuracy: 0.393 test_accuracy: 0.378\n",
            "[1,   455] train_loss: 1.550 train_accuracy: 0.408 test_accuracy: 0.385\n",
            "[1,   460] train_loss: 1.427 train_accuracy: 0.406 test_accuracy: 0.383\n",
            "[1,   465] train_loss: 1.452 train_accuracy: 0.434 test_accuracy: 0.415\n",
            "[1,   470] train_loss: 1.364 train_accuracy: 0.342 test_accuracy: 0.354\n",
            "[1,   475] train_loss: 1.931 train_accuracy: 0.303 test_accuracy: 0.252\n",
            "[1,   480] train_loss: 2.416 train_accuracy: 0.335 test_accuracy: 0.336\n",
            "[1,   485] train_loss: 1.727 train_accuracy: 0.285 test_accuracy: 0.279\n",
            "[1,   490] train_loss: 1.731 train_accuracy: 0.381 test_accuracy: 0.351\n",
            "[1,   495] train_loss: 1.323 train_accuracy: 0.390 test_accuracy: 0.377\n",
            "[1,   500] train_loss: 1.592 train_accuracy: 0.422 test_accuracy: 0.393\n",
            "[1,   505] train_loss: 1.418 train_accuracy: 0.378 test_accuracy: 0.346\n",
            "[1,   510] train_loss: 1.670 train_accuracy: 0.393 test_accuracy: 0.360\n",
            "[1,   515] train_loss: 2.000 train_accuracy: 0.406 test_accuracy: 0.387\n",
            "[1,   520] train_loss: 1.764 train_accuracy: 0.333 test_accuracy: 0.310\n",
            "[1,   525] train_loss: 1.860 train_accuracy: 0.371 test_accuracy: 0.388\n",
            "[1,   530] train_loss: 1.573 train_accuracy: 0.317 test_accuracy: 0.355\n",
            "[1,   535] train_loss: 2.699 train_accuracy: 0.395 test_accuracy: 0.449\n",
            "[1,   540] train_loss: 1.382 train_accuracy: 0.374 test_accuracy: 0.357\n",
            "[1,   545] train_loss: 1.531 train_accuracy: 0.407 test_accuracy: 0.431\n",
            "[1,   550] train_loss: 2.086 train_accuracy: 0.474 test_accuracy: 0.519\n",
            "[1,   555] train_loss: 1.500 train_accuracy: 0.466 test_accuracy: 0.512\n",
            "[1,   560] train_loss: 1.435 train_accuracy: 0.492 test_accuracy: 0.539\n",
            "[1,   565] train_loss: 1.675 train_accuracy: 0.478 test_accuracy: 0.544\n",
            "[1,   570] train_loss: 1.310 train_accuracy: 0.447 test_accuracy: 0.507\n",
            "[1,   575] train_loss: 1.961 train_accuracy: 0.462 test_accuracy: 0.522\n",
            "[1,   580] train_loss: 1.071 train_accuracy: 0.454 test_accuracy: 0.444\n",
            "[1,   585] train_loss: 1.561 train_accuracy: 0.488 test_accuracy: 0.483\n",
            "[1,   590] train_loss: 1.137 train_accuracy: 0.492 test_accuracy: 0.516\n",
            "[1,   595] train_loss: 1.724 train_accuracy: 0.482 test_accuracy: 0.498\n",
            "[1,   600] train_loss: 1.708 train_accuracy: 0.483 test_accuracy: 0.448\n",
            "[1,   605] train_loss: 1.663 train_accuracy: 0.445 test_accuracy: 0.413\n",
            "[1,   610] train_loss: 1.279 train_accuracy: 0.484 test_accuracy: 0.451\n",
            "[1,   615] train_loss: 1.464 train_accuracy: 0.521 test_accuracy: 0.516\n",
            "[1,   620] train_loss: 1.380 train_accuracy: 0.532 test_accuracy: 0.584\n",
            "[1,   625] train_loss: 1.649 train_accuracy: 0.557 test_accuracy: 0.615\n",
            "[1,   630] train_loss: 1.544 train_accuracy: 0.539 test_accuracy: 0.594\n",
            "[1,   635] train_loss: 1.414 train_accuracy: 0.506 test_accuracy: 0.564\n",
            "[1,   640] train_loss: 1.072 train_accuracy: 0.566 test_accuracy: 0.617\n",
            "[1,   645] train_loss: 0.924 train_accuracy: 0.569 test_accuracy: 0.585\n",
            "[1,   650] train_loss: 1.273 train_accuracy: 0.548 test_accuracy: 0.564\n",
            "[1,   655] train_loss: 1.176 train_accuracy: 0.476 test_accuracy: 0.469\n",
            "[1,   660] train_loss: 1.244 train_accuracy: 0.500 test_accuracy: 0.483\n",
            "[1,   665] train_loss: 1.082 train_accuracy: 0.531 test_accuracy: 0.511\n",
            "[1,   670] train_loss: 1.387 train_accuracy: 0.470 test_accuracy: 0.467\n",
            "[1,   675] train_loss: 1.377 train_accuracy: 0.474 test_accuracy: 0.454\n",
            "[1,   680] train_loss: 1.024 train_accuracy: 0.563 test_accuracy: 0.601\n",
            "[1,   685] train_loss: 1.775 train_accuracy: 0.510 test_accuracy: 0.538\n",
            "[1,   690] train_loss: 1.456 train_accuracy: 0.526 test_accuracy: 0.546\n",
            "[1,   695] train_loss: 1.112 train_accuracy: 0.576 test_accuracy: 0.639\n",
            "[1,   700] train_loss: 1.045 train_accuracy: 0.562 test_accuracy: 0.611\n",
            "[1,   705] train_loss: 1.578 train_accuracy: 0.557 test_accuracy: 0.589\n",
            "[1,   710] train_loss: 1.048 train_accuracy: 0.577 test_accuracy: 0.642\n",
            "[1,   715] train_loss: 1.132 train_accuracy: 0.543 test_accuracy: 0.600\n",
            "[1,   720] train_loss: 0.961 train_accuracy: 0.578 test_accuracy: 0.630\n",
            "[1,   725] train_loss: 1.048 train_accuracy: 0.551 test_accuracy: 0.598\n",
            "[1,   730] train_loss: 0.998 train_accuracy: 0.550 test_accuracy: 0.591\n",
            "[1,   735] train_loss: 1.293 train_accuracy: 0.553 test_accuracy: 0.590\n",
            "[1,   740] train_loss: 0.952 train_accuracy: 0.549 test_accuracy: 0.588\n",
            "[1,   745] train_loss: 1.385 train_accuracy: 0.574 test_accuracy: 0.615\n",
            "[1,   750] train_loss: 1.420 train_accuracy: 0.525 test_accuracy: 0.587\n",
            "[1,   755] train_loss: 1.080 train_accuracy: 0.504 test_accuracy: 0.567\n",
            "[1,   760] train_loss: 1.149 train_accuracy: 0.493 test_accuracy: 0.531\n",
            "[1,   765] train_loss: 1.544 train_accuracy: 0.483 test_accuracy: 0.498\n",
            "[1,   770] train_loss: 1.721 train_accuracy: 0.509 test_accuracy: 0.500\n",
            "[1,   775] train_loss: 1.323 train_accuracy: 0.544 test_accuracy: 0.546\n",
            "[1,   780] train_loss: 1.305 train_accuracy: 0.563 test_accuracy: 0.575\n",
            "[1,   785] train_loss: 1.102 train_accuracy: 0.522 test_accuracy: 0.550\n",
            "[1,   790] train_loss: 0.953 train_accuracy: 0.513 test_accuracy: 0.553\n",
            "[1,   795] train_loss: 1.279 train_accuracy: 0.554 test_accuracy: 0.602\n",
            "[1,   800] train_loss: 1.397 train_accuracy: 0.553 test_accuracy: 0.599\n",
            "[1,   805] train_loss: 0.843 train_accuracy: 0.570 test_accuracy: 0.607\n",
            "[1,   810] train_loss: 1.169 train_accuracy: 0.559 test_accuracy: 0.593\n",
            "[1,   815] train_loss: 1.068 train_accuracy: 0.560 test_accuracy: 0.626\n",
            "[1,   820] train_loss: 0.804 train_accuracy: 0.572 test_accuracy: 0.633\n",
            "[1,   825] train_loss: 0.794 train_accuracy: 0.566 test_accuracy: 0.620\n",
            "[1,   830] train_loss: 1.330 train_accuracy: 0.560 test_accuracy: 0.620\n",
            "[1,   835] train_loss: 1.068 train_accuracy: 0.582 test_accuracy: 0.627\n",
            "[1,   840] train_loss: 1.045 train_accuracy: 0.618 test_accuracy: 0.650\n",
            "[1,   845] train_loss: 1.291 train_accuracy: 0.581 test_accuracy: 0.592\n",
            "[1,   850] train_loss: 0.935 train_accuracy: 0.541 test_accuracy: 0.554\n",
            "[1,   855] train_loss: 1.033 train_accuracy: 0.569 test_accuracy: 0.579\n",
            "[1,   860] train_loss: 1.367 train_accuracy: 0.600 test_accuracy: 0.619\n",
            "[1,   865] train_loss: 1.072 train_accuracy: 0.607 test_accuracy: 0.641\n",
            "[1,   870] train_loss: 1.052 train_accuracy: 0.600 test_accuracy: 0.619\n",
            "[1,   875] train_loss: 1.552 train_accuracy: 0.575 test_accuracy: 0.564\n",
            "[1,   880] train_loss: 0.800 train_accuracy: 0.574 test_accuracy: 0.554\n",
            "[1,   885] train_loss: 0.955 train_accuracy: 0.606 test_accuracy: 0.607\n",
            "[1,   890] train_loss: 1.120 train_accuracy: 0.580 test_accuracy: 0.623\n",
            "[1,   895] train_loss: 1.134 train_accuracy: 0.590 test_accuracy: 0.646\n",
            "[1,   900] train_loss: 0.726 train_accuracy: 0.581 test_accuracy: 0.642\n",
            "[1,   905] train_loss: 1.159 train_accuracy: 0.597 test_accuracy: 0.646\n",
            "[1,   910] train_loss: 1.385 train_accuracy: 0.583 test_accuracy: 0.620\n",
            "[1,   915] train_loss: 1.364 train_accuracy: 0.639 test_accuracy: 0.665\n",
            "[1,   920] train_loss: 1.102 train_accuracy: 0.596 test_accuracy: 0.645\n",
            "[1,   925] train_loss: 1.603 train_accuracy: 0.594 test_accuracy: 0.640\n",
            "[1,   930] train_loss: 1.255 train_accuracy: 0.570 test_accuracy: 0.635\n",
            "[1,   935] train_loss: 1.528 train_accuracy: 0.538 test_accuracy: 0.616\n",
            "[1,   940] train_loss: 1.166 train_accuracy: 0.496 test_accuracy: 0.565\n",
            "[1,   945] train_loss: 1.325 train_accuracy: 0.536 test_accuracy: 0.600\n",
            "[1,   950] train_loss: 1.386 train_accuracy: 0.578 test_accuracy: 0.645\n",
            "[1,   955] train_loss: 0.905 train_accuracy: 0.611 test_accuracy: 0.654\n",
            "[1,   960] train_loss: 1.053 train_accuracy: 0.623 test_accuracy: 0.627\n",
            "[1,   965] train_loss: 0.909 train_accuracy: 0.592 test_accuracy: 0.595\n",
            "[1,   970] train_loss: 1.188 train_accuracy: 0.581 test_accuracy: 0.594\n",
            "[1,   975] train_loss: 1.246 train_accuracy: 0.594 test_accuracy: 0.631\n",
            "[1,   980] train_loss: 0.952 train_accuracy: 0.589 test_accuracy: 0.658\n",
            "[1,   985] train_loss: 1.223 train_accuracy: 0.645 test_accuracy: 0.705\n",
            "[1,   990] train_loss: 0.846 train_accuracy: 0.617 test_accuracy: 0.675\n",
            "[1,   995] train_loss: 0.901 train_accuracy: 0.596 test_accuracy: 0.657\n",
            "[1,  1000] train_loss: 0.739 train_accuracy: 0.603 test_accuracy: 0.652\n",
            "[1,  1005] train_loss: 0.998 train_accuracy: 0.618 test_accuracy: 0.684\n",
            "[1,  1010] train_loss: 1.274 train_accuracy: 0.557 test_accuracy: 0.609\n",
            "[1,  1015] train_loss: 1.927 train_accuracy: 0.566 test_accuracy: 0.625\n",
            "[1,  1020] train_loss: 1.336 train_accuracy: 0.560 test_accuracy: 0.607\n",
            "[1,  1025] train_loss: 0.993 train_accuracy: 0.481 test_accuracy: 0.514\n",
            "[1,  1030] train_loss: 1.019 train_accuracy: 0.541 test_accuracy: 0.573\n",
            "[1,  1035] train_loss: 1.154 train_accuracy: 0.536 test_accuracy: 0.573\n",
            "[1,  1040] train_loss: 0.901 train_accuracy: 0.585 test_accuracy: 0.655\n",
            "[1,  1045] train_loss: 0.994 train_accuracy: 0.612 test_accuracy: 0.655\n",
            "[1,  1050] train_loss: 0.942 train_accuracy: 0.526 test_accuracy: 0.557\n",
            "[1,  1055] train_loss: 1.756 train_accuracy: 0.565 test_accuracy: 0.609\n",
            "[1,  1060] train_loss: 1.227 train_accuracy: 0.598 test_accuracy: 0.652\n",
            "[1,  1065] train_loss: 1.391 train_accuracy: 0.618 test_accuracy: 0.595\n",
            "[1,  1070] train_loss: 0.898 train_accuracy: 0.629 test_accuracy: 0.599\n",
            "[1,  1075] train_loss: 1.006 train_accuracy: 0.618 test_accuracy: 0.628\n",
            "[1,  1080] train_loss: 0.897 train_accuracy: 0.600 test_accuracy: 0.616\n",
            "[1,  1085] train_loss: 0.955 train_accuracy: 0.591 test_accuracy: 0.617\n",
            "[1,  1090] train_loss: 1.213 train_accuracy: 0.639 test_accuracy: 0.618\n",
            "[1,  1095] train_loss: 0.591 train_accuracy: 0.620 test_accuracy: 0.596\n",
            "[1,  1100] train_loss: 1.033 train_accuracy: 0.625 test_accuracy: 0.600\n",
            "[1,  1105] train_loss: 0.940 train_accuracy: 0.647 test_accuracy: 0.639\n",
            "[1,  1110] train_loss: 0.876 train_accuracy: 0.659 test_accuracy: 0.652\n",
            "[1,  1115] train_loss: 0.647 train_accuracy: 0.682 test_accuracy: 0.670\n",
            "[1,  1120] train_loss: 1.199 train_accuracy: 0.645 test_accuracy: 0.650\n",
            "[1,  1125] train_loss: 0.916 train_accuracy: 0.640 test_accuracy: 0.627\n",
            "[1,  1130] train_loss: 0.847 train_accuracy: 0.633 test_accuracy: 0.613\n",
            "[1,  1135] train_loss: 1.526 train_accuracy: 0.640 test_accuracy: 0.619\n",
            "[1,  1140] train_loss: 1.054 train_accuracy: 0.683 test_accuracy: 0.686\n",
            "[1,  1145] train_loss: 0.848 train_accuracy: 0.668 test_accuracy: 0.729\n",
            "[1,  1150] train_loss: 0.967 train_accuracy: 0.663 test_accuracy: 0.739\n",
            "[1,  1155] train_loss: 0.629 train_accuracy: 0.669 test_accuracy: 0.744\n",
            "[1,  1160] train_loss: 1.201 train_accuracy: 0.638 test_accuracy: 0.701\n",
            "[1,  1165] train_loss: 0.904 train_accuracy: 0.621 test_accuracy: 0.683\n",
            "[1,  1170] train_loss: 0.654 train_accuracy: 0.671 test_accuracy: 0.724\n",
            "[1,  1175] train_loss: 1.204 train_accuracy: 0.645 test_accuracy: 0.719\n",
            "[1,  1180] train_loss: 1.276 train_accuracy: 0.661 test_accuracy: 0.718\n",
            "[1,  1185] train_loss: 0.733 train_accuracy: 0.719 test_accuracy: 0.733\n",
            "[1,  1190] train_loss: 0.707 train_accuracy: 0.671 test_accuracy: 0.693\n",
            "[1,  1195] train_loss: 0.944 train_accuracy: 0.653 test_accuracy: 0.676\n",
            "[1,  1200] train_loss: 1.038 train_accuracy: 0.668 test_accuracy: 0.666\n",
            "[1,  1205] train_loss: 0.797 train_accuracy: 0.646 test_accuracy: 0.664\n",
            "[1,  1210] train_loss: 1.065 train_accuracy: 0.631 test_accuracy: 0.665\n",
            "[1,  1215] train_loss: 0.830 train_accuracy: 0.652 test_accuracy: 0.695\n",
            "[1,  1220] train_loss: 0.582 train_accuracy: 0.676 test_accuracy: 0.720\n",
            "[1,  1225] train_loss: 0.795 train_accuracy: 0.662 test_accuracy: 0.722\n",
            "[1,  1230] train_loss: 0.754 train_accuracy: 0.634 test_accuracy: 0.700\n",
            "[1,  1235] train_loss: 1.039 train_accuracy: 0.600 test_accuracy: 0.670\n",
            "[1,  1240] train_loss: 1.148 train_accuracy: 0.658 test_accuracy: 0.719\n",
            "[1,  1245] train_loss: 0.777 train_accuracy: 0.634 test_accuracy: 0.681\n",
            "[1,  1250] train_loss: 1.723 train_accuracy: 0.637 test_accuracy: 0.684\n",
            "[1,  1255] train_loss: 1.460 train_accuracy: 0.642 test_accuracy: 0.703\n",
            "[1,  1260] train_loss: 1.112 train_accuracy: 0.653 test_accuracy: 0.721\n",
            "[1,  1265] train_loss: 1.301 train_accuracy: 0.665 test_accuracy: 0.731\n",
            "[1,  1270] train_loss: 1.131 train_accuracy: 0.685 test_accuracy: 0.747\n",
            "[1,  1275] train_loss: 0.709 train_accuracy: 0.713 test_accuracy: 0.731\n",
            "[1,  1280] train_loss: 0.633 train_accuracy: 0.693 test_accuracy: 0.688\n",
            "[1,  1285] train_loss: 0.859 train_accuracy: 0.677 test_accuracy: 0.669\n",
            "[1,  1290] train_loss: 0.953 train_accuracy: 0.678 test_accuracy: 0.668\n",
            "[1,  1295] train_loss: 0.477 train_accuracy: 0.681 test_accuracy: 0.677\n",
            "[1,  1300] train_loss: 0.802 train_accuracy: 0.680 test_accuracy: 0.692\n",
            "[1,  1305] train_loss: 1.053 train_accuracy: 0.722 test_accuracy: 0.742\n",
            "[1,  1310] train_loss: 0.648 train_accuracy: 0.722 test_accuracy: 0.730\n",
            "[1,  1315] train_loss: 0.640 train_accuracy: 0.681 test_accuracy: 0.660\n",
            "[1,  1320] train_loss: 0.888 train_accuracy: 0.641 test_accuracy: 0.619\n",
            "[1,  1325] train_loss: 1.112 train_accuracy: 0.677 test_accuracy: 0.696\n",
            "[1,  1330] train_loss: 0.764 train_accuracy: 0.692 test_accuracy: 0.727\n",
            "[1,  1335] train_loss: 1.116 train_accuracy: 0.662 test_accuracy: 0.709\n",
            "[1,  1340] train_loss: 1.036 train_accuracy: 0.715 test_accuracy: 0.745\n",
            "[1,  1345] train_loss: 0.745 train_accuracy: 0.677 test_accuracy: 0.686\n",
            "[1,  1350] train_loss: 0.762 train_accuracy: 0.626 test_accuracy: 0.625\n",
            "[1,  1355] train_loss: 1.483 train_accuracy: 0.687 test_accuracy: 0.699\n",
            "[1,  1360] train_loss: 1.101 train_accuracy: 0.670 test_accuracy: 0.716\n",
            "[1,  1365] train_loss: 0.944 train_accuracy: 0.622 test_accuracy: 0.683\n",
            "[1,  1370] train_loss: 0.936 train_accuracy: 0.632 test_accuracy: 0.696\n",
            "[1,  1375] train_loss: 1.293 train_accuracy: 0.658 test_accuracy: 0.722\n",
            "[1,  1380] train_loss: 0.641 train_accuracy: 0.663 test_accuracy: 0.720\n",
            "[1,  1385] train_loss: 0.528 train_accuracy: 0.670 test_accuracy: 0.722\n",
            "[1,  1390] train_loss: 1.059 train_accuracy: 0.690 test_accuracy: 0.715\n",
            "[1,  1395] train_loss: 1.223 train_accuracy: 0.694 test_accuracy: 0.668\n",
            "[1,  1400] train_loss: 1.130 train_accuracy: 0.673 test_accuracy: 0.649\n",
            "[1,  1405] train_loss: 0.987 train_accuracy: 0.666 test_accuracy: 0.655\n",
            "[1,  1410] train_loss: 0.633 train_accuracy: 0.694 test_accuracy: 0.683\n",
            "[1,  1415] train_loss: 0.862 train_accuracy: 0.685 test_accuracy: 0.715\n",
            "[1,  1420] train_loss: 0.883 train_accuracy: 0.648 test_accuracy: 0.706\n",
            "[1,  1425] train_loss: 1.360 train_accuracy: 0.700 test_accuracy: 0.746\n",
            "[1,  1430] train_loss: 0.608 train_accuracy: 0.715 test_accuracy: 0.744\n",
            "[1,  1435] train_loss: 1.253 train_accuracy: 0.693 test_accuracy: 0.710\n",
            "[1,  1440] train_loss: 1.261 train_accuracy: 0.749 test_accuracy: 0.741\n",
            "[1,  1445] train_loss: 0.680 train_accuracy: 0.734 test_accuracy: 0.725\n",
            "[1,  1450] train_loss: 0.876 train_accuracy: 0.720 test_accuracy: 0.686\n",
            "[1,  1455] train_loss: 1.498 train_accuracy: 0.738 test_accuracy: 0.721\n",
            "[1,  1460] train_loss: 0.737 train_accuracy: 0.749 test_accuracy: 0.783\n",
            "[1,  1465] train_loss: 0.509 train_accuracy: 0.727 test_accuracy: 0.789\n",
            "[1,  1470] train_loss: 0.596 train_accuracy: 0.726 test_accuracy: 0.796\n",
            "[1,  1475] train_loss: 0.767 train_accuracy: 0.713 test_accuracy: 0.782\n",
            "[1,  1480] train_loss: 0.451 train_accuracy: 0.720 test_accuracy: 0.782\n",
            "[1,  1485] train_loss: 0.739 train_accuracy: 0.726 test_accuracy: 0.791\n",
            "[1,  1490] train_loss: 1.184 train_accuracy: 0.737 test_accuracy: 0.792\n",
            "[1,  1495] train_loss: 0.986 train_accuracy: 0.716 test_accuracy: 0.738\n",
            "[1,  1500] train_loss: 0.950 train_accuracy: 0.710 test_accuracy: 0.712\n",
            "[1,  1505] train_loss: 0.680 train_accuracy: 0.738 test_accuracy: 0.729\n",
            "[1,  1510] train_loss: 0.575 train_accuracy: 0.737 test_accuracy: 0.722\n",
            "[1,  1515] train_loss: 0.812 train_accuracy: 0.738 test_accuracy: 0.718\n",
            "[1,  1520] train_loss: 0.808 train_accuracy: 0.745 test_accuracy: 0.750\n",
            "[1,  1525] train_loss: 0.797 train_accuracy: 0.739 test_accuracy: 0.764\n",
            "[1,  1530] train_loss: 0.678 train_accuracy: 0.740 test_accuracy: 0.770\n",
            "[1,  1535] train_loss: 1.052 train_accuracy: 0.763 test_accuracy: 0.773\n",
            "[1,  1540] train_loss: 0.632 train_accuracy: 0.769 test_accuracy: 0.764\n",
            "[1,  1545] train_loss: 0.579 train_accuracy: 0.768 test_accuracy: 0.760\n",
            "[1,  1550] train_loss: 0.661 train_accuracy: 0.762 test_accuracy: 0.774\n",
            "[1,  1555] train_loss: 0.783 train_accuracy: 0.746 test_accuracy: 0.767\n",
            "[1,  1560] train_loss: 0.734 train_accuracy: 0.737 test_accuracy: 0.770\n",
            "[1,  1565] train_loss: 0.747 train_accuracy: 0.734 test_accuracy: 0.744\n",
            "[1,  1570] train_loss: 0.935 train_accuracy: 0.731 test_accuracy: 0.716\n",
            "[1,  1575] train_loss: 0.889 train_accuracy: 0.676 test_accuracy: 0.666\n",
            "[1,  1580] train_loss: 0.782 train_accuracy: 0.652 test_accuracy: 0.652\n",
            "[1,  1585] train_loss: 1.107 train_accuracy: 0.646 test_accuracy: 0.666\n",
            "[1,  1590] train_loss: 1.115 train_accuracy: 0.713 test_accuracy: 0.757\n",
            "[1,  1595] train_loss: 1.123 train_accuracy: 0.709 test_accuracy: 0.760\n",
            "[1,  1600] train_loss: 1.121 train_accuracy: 0.702 test_accuracy: 0.738\n",
            "[1,  1605] train_loss: 1.447 train_accuracy: 0.748 test_accuracy: 0.768\n",
            "[1,  1610] train_loss: 0.596 train_accuracy: 0.764 test_accuracy: 0.758\n",
            "[1,  1615] train_loss: 0.785 train_accuracy: 0.744 test_accuracy: 0.717\n",
            "[1,  1620] train_loss: 0.777 train_accuracy: 0.778 test_accuracy: 0.763\n",
            "[1,  1625] train_loss: 0.523 train_accuracy: 0.794 test_accuracy: 0.812\n",
            "[1,  1630] train_loss: 0.773 train_accuracy: 0.793 test_accuracy: 0.820\n",
            "[1,  1635] train_loss: 0.918 train_accuracy: 0.782 test_accuracy: 0.803\n",
            "[1,  1640] train_loss: 0.640 train_accuracy: 0.759 test_accuracy: 0.774\n",
            "[1,  1645] train_loss: 0.801 train_accuracy: 0.784 test_accuracy: 0.786\n",
            "[1,  1650] train_loss: 0.532 train_accuracy: 0.782 test_accuracy: 0.776\n",
            "[1,  1655] train_loss: 0.599 train_accuracy: 0.769 test_accuracy: 0.776\n",
            "[1,  1660] train_loss: 0.840 train_accuracy: 0.775 test_accuracy: 0.771\n",
            "[1,  1665] train_loss: 0.488 train_accuracy: 0.771 test_accuracy: 0.744\n",
            "[1,  1670] train_loss: 1.048 train_accuracy: 0.758 test_accuracy: 0.722\n",
            "[1,  1675] train_loss: 0.597 train_accuracy: 0.747 test_accuracy: 0.742\n",
            "[1,  1680] train_loss: 0.638 train_accuracy: 0.722 test_accuracy: 0.749\n",
            "[1,  1685] train_loss: 0.513 train_accuracy: 0.707 test_accuracy: 0.744\n",
            "[1,  1690] train_loss: 0.829 train_accuracy: 0.732 test_accuracy: 0.752\n",
            "[1,  1695] train_loss: 0.805 train_accuracy: 0.754 test_accuracy: 0.771\n",
            "[1,  1700] train_loss: 0.929 train_accuracy: 0.780 test_accuracy: 0.776\n",
            "[1,  1705] train_loss: 1.292 train_accuracy: 0.761 test_accuracy: 0.762\n",
            "[1,  1710] train_loss: 0.887 train_accuracy: 0.724 test_accuracy: 0.753\n",
            "[1,  1715] train_loss: 0.826 train_accuracy: 0.709 test_accuracy: 0.741\n",
            "[1,  1720] train_loss: 0.620 train_accuracy: 0.649 test_accuracy: 0.667\n",
            "[1,  1725] train_loss: 1.303 train_accuracy: 0.722 test_accuracy: 0.716\n",
            "[2,     5] train_loss: 1.040 train_accuracy: 0.718 test_accuracy: 0.710\n",
            "[2,    10] train_loss: 1.056 train_accuracy: 0.722 test_accuracy: 0.740\n",
            "[2,    15] train_loss: 0.662 train_accuracy: 0.734 test_accuracy: 0.761\n",
            "[2,    20] train_loss: 0.566 train_accuracy: 0.738 test_accuracy: 0.769\n",
            "[2,    25] train_loss: 1.124 train_accuracy: 0.740 test_accuracy: 0.755\n",
            "[2,    30] train_loss: 1.013 train_accuracy: 0.796 test_accuracy: 0.798\n",
            "[2,    35] train_loss: 0.804 train_accuracy: 0.792 test_accuracy: 0.782\n",
            "[2,    40] train_loss: 0.709 train_accuracy: 0.794 test_accuracy: 0.793\n",
            "[2,    45] train_loss: 0.936 train_accuracy: 0.775 test_accuracy: 0.794\n",
            "[2,    50] train_loss: 0.523 train_accuracy: 0.740 test_accuracy: 0.770\n",
            "[2,    55] train_loss: 0.726 train_accuracy: 0.744 test_accuracy: 0.780\n",
            "[2,    60] train_loss: 0.423 train_accuracy: 0.761 test_accuracy: 0.791\n",
            "[2,    65] train_loss: 0.811 train_accuracy: 0.763 test_accuracy: 0.802\n",
            "[2,    70] train_loss: 0.503 train_accuracy: 0.777 test_accuracy: 0.809\n",
            "[2,    75] train_loss: 0.461 train_accuracy: 0.765 test_accuracy: 0.786\n",
            "[2,    80] train_loss: 0.346 train_accuracy: 0.711 test_accuracy: 0.729\n",
            "[2,    85] train_loss: 0.600 train_accuracy: 0.699 test_accuracy: 0.715\n",
            "[2,    90] train_loss: 0.774 train_accuracy: 0.692 test_accuracy: 0.723\n",
            "[2,    95] train_loss: 1.318 train_accuracy: 0.717 test_accuracy: 0.752\n",
            "[2,   100] train_loss: 0.838 train_accuracy: 0.746 test_accuracy: 0.754\n",
            "[2,   105] train_loss: 0.536 train_accuracy: 0.724 test_accuracy: 0.736\n",
            "[2,   110] train_loss: 1.009 train_accuracy: 0.695 test_accuracy: 0.718\n",
            "[2,   115] train_loss: 0.785 train_accuracy: 0.718 test_accuracy: 0.768\n",
            "[2,   120] train_loss: 0.720 train_accuracy: 0.731 test_accuracy: 0.781\n",
            "[2,   125] train_loss: 0.686 train_accuracy: 0.744 test_accuracy: 0.798\n",
            "[2,   130] train_loss: 1.079 train_accuracy: 0.751 test_accuracy: 0.792\n",
            "[2,   135] train_loss: 0.908 train_accuracy: 0.745 test_accuracy: 0.750\n",
            "[2,   140] train_loss: 0.564 train_accuracy: 0.748 test_accuracy: 0.757\n",
            "[2,   145] train_loss: 0.795 train_accuracy: 0.731 test_accuracy: 0.755\n",
            "[2,   150] train_loss: 0.549 train_accuracy: 0.768 test_accuracy: 0.799\n",
            "[2,   155] train_loss: 0.707 train_accuracy: 0.762 test_accuracy: 0.806\n",
            "[2,   160] train_loss: 1.064 train_accuracy: 0.784 test_accuracy: 0.798\n",
            "[2,   165] train_loss: 0.402 train_accuracy: 0.787 test_accuracy: 0.789\n",
            "[2,   170] train_loss: 0.410 train_accuracy: 0.768 test_accuracy: 0.772\n",
            "[2,   175] train_loss: 0.684 train_accuracy: 0.755 test_accuracy: 0.746\n",
            "[2,   180] train_loss: 0.622 train_accuracy: 0.769 test_accuracy: 0.746\n",
            "[2,   185] train_loss: 0.371 train_accuracy: 0.738 test_accuracy: 0.713\n",
            "[2,   190] train_loss: 0.794 train_accuracy: 0.688 test_accuracy: 0.669\n",
            "[2,   195] train_loss: 1.018 train_accuracy: 0.736 test_accuracy: 0.718\n",
            "[2,   200] train_loss: 0.534 train_accuracy: 0.738 test_accuracy: 0.717\n",
            "[2,   205] train_loss: 2.001 train_accuracy: 0.766 test_accuracy: 0.767\n",
            "[2,   210] train_loss: 0.659 train_accuracy: 0.747 test_accuracy: 0.769\n",
            "[2,   215] train_loss: 1.495 train_accuracy: 0.777 test_accuracy: 0.764\n",
            "[2,   220] train_loss: 0.712 train_accuracy: 0.781 test_accuracy: 0.752\n",
            "[2,   225] train_loss: 0.662 train_accuracy: 0.782 test_accuracy: 0.769\n",
            "[2,   230] train_loss: 0.858 train_accuracy: 0.722 test_accuracy: 0.704\n",
            "[2,   235] train_loss: 0.318 train_accuracy: 0.697 test_accuracy: 0.677\n",
            "[2,   240] train_loss: 1.011 train_accuracy: 0.708 test_accuracy: 0.688\n",
            "[2,   245] train_loss: 0.921 train_accuracy: 0.725 test_accuracy: 0.701\n",
            "[2,   250] train_loss: 0.731 train_accuracy: 0.715 test_accuracy: 0.696\n",
            "[2,   255] train_loss: 0.635 train_accuracy: 0.739 test_accuracy: 0.735\n",
            "[2,   260] train_loss: 0.484 train_accuracy: 0.749 test_accuracy: 0.751\n",
            "[2,   265] train_loss: 0.996 train_accuracy: 0.746 test_accuracy: 0.756\n",
            "[2,   270] train_loss: 0.940 train_accuracy: 0.715 test_accuracy: 0.748\n",
            "[2,   275] train_loss: 0.780 train_accuracy: 0.752 test_accuracy: 0.790\n",
            "[2,   280] train_loss: 0.733 train_accuracy: 0.758 test_accuracy: 0.800\n",
            "[2,   285] train_loss: 0.629 train_accuracy: 0.768 test_accuracy: 0.801\n",
            "[2,   290] train_loss: 0.670 train_accuracy: 0.805 test_accuracy: 0.796\n",
            "[2,   295] train_loss: 0.850 train_accuracy: 0.767 test_accuracy: 0.726\n",
            "[2,   300] train_loss: 0.830 train_accuracy: 0.788 test_accuracy: 0.747\n",
            "[2,   305] train_loss: 0.551 train_accuracy: 0.783 test_accuracy: 0.776\n",
            "[2,   310] train_loss: 0.737 train_accuracy: 0.796 test_accuracy: 0.792\n",
            "[2,   315] train_loss: 0.684 train_accuracy: 0.807 test_accuracy: 0.793\n",
            "[2,   320] train_loss: 0.560 train_accuracy: 0.771 test_accuracy: 0.770\n",
            "[2,   325] train_loss: 1.129 train_accuracy: 0.745 test_accuracy: 0.747\n",
            "[2,   330] train_loss: 1.159 train_accuracy: 0.709 test_accuracy: 0.709\n",
            "[2,   335] train_loss: 0.726 train_accuracy: 0.707 test_accuracy: 0.696\n",
            "[2,   340] train_loss: 0.784 train_accuracy: 0.763 test_accuracy: 0.756\n",
            "[2,   345] train_loss: 0.785 train_accuracy: 0.775 test_accuracy: 0.776\n",
            "[2,   350] train_loss: 0.758 train_accuracy: 0.790 test_accuracy: 0.790\n",
            "[2,   355] train_loss: 0.446 train_accuracy: 0.804 test_accuracy: 0.805\n",
            "[2,   360] train_loss: 0.786 train_accuracy: 0.789 test_accuracy: 0.783\n",
            "[2,   365] train_loss: 0.345 train_accuracy: 0.763 test_accuracy: 0.751\n",
            "[2,   370] train_loss: 0.873 train_accuracy: 0.783 test_accuracy: 0.771\n",
            "[2,   375] train_loss: 0.830 train_accuracy: 0.786 test_accuracy: 0.776\n",
            "[2,   380] train_loss: 0.245 train_accuracy: 0.769 test_accuracy: 0.752\n",
            "[2,   385] train_loss: 0.713 train_accuracy: 0.793 test_accuracy: 0.780\n",
            "[2,   390] train_loss: 0.591 train_accuracy: 0.793 test_accuracy: 0.818\n",
            "[2,   395] train_loss: 0.300 train_accuracy: 0.731 test_accuracy: 0.794\n",
            "[2,   400] train_loss: 1.102 train_accuracy: 0.751 test_accuracy: 0.771\n",
            "[2,   405] train_loss: 1.246 train_accuracy: 0.768 test_accuracy: 0.759\n",
            "[2,   410] train_loss: 0.574 train_accuracy: 0.751 test_accuracy: 0.747\n",
            "[2,   415] train_loss: 0.562 train_accuracy: 0.784 test_accuracy: 0.780\n",
            "[2,   420] train_loss: 0.580 train_accuracy: 0.740 test_accuracy: 0.740\n",
            "[2,   425] train_loss: 0.900 train_accuracy: 0.755 test_accuracy: 0.775\n",
            "[2,   430] train_loss: 0.680 train_accuracy: 0.752 test_accuracy: 0.780\n",
            "[2,   435] train_loss: 0.564 train_accuracy: 0.767 test_accuracy: 0.798\n",
            "[2,   440] train_loss: 0.579 train_accuracy: 0.786 test_accuracy: 0.818\n",
            "[2,   445] train_loss: 0.884 train_accuracy: 0.770 test_accuracy: 0.799\n",
            "[2,   450] train_loss: 0.752 train_accuracy: 0.799 test_accuracy: 0.824\n",
            "[2,   455] train_loss: 0.365 train_accuracy: 0.791 test_accuracy: 0.812\n",
            "[2,   460] train_loss: 0.740 train_accuracy: 0.782 test_accuracy: 0.779\n",
            "[2,   465] train_loss: 0.661 train_accuracy: 0.768 test_accuracy: 0.740\n",
            "[2,   470] train_loss: 0.891 train_accuracy: 0.807 test_accuracy: 0.776\n",
            "[2,   475] train_loss: 0.539 train_accuracy: 0.812 test_accuracy: 0.790\n",
            "[2,   480] train_loss: 0.493 train_accuracy: 0.805 test_accuracy: 0.794\n",
            "[2,   485] train_loss: 0.618 train_accuracy: 0.781 test_accuracy: 0.783\n",
            "[2,   490] train_loss: 0.309 train_accuracy: 0.768 test_accuracy: 0.787\n",
            "[2,   495] train_loss: 0.619 train_accuracy: 0.798 test_accuracy: 0.808\n",
            "[2,   500] train_loss: 0.426 train_accuracy: 0.831 test_accuracy: 0.836\n",
            "[2,   505] train_loss: 0.459 train_accuracy: 0.820 test_accuracy: 0.818\n",
            "[2,   510] train_loss: 0.496 train_accuracy: 0.822 test_accuracy: 0.805\n",
            "[2,   515] train_loss: 0.295 train_accuracy: 0.822 test_accuracy: 0.796\n",
            "[2,   520] train_loss: 0.993 train_accuracy: 0.825 test_accuracy: 0.815\n",
            "[2,   525] train_loss: 0.364 train_accuracy: 0.825 test_accuracy: 0.816\n",
            "[2,   530] train_loss: 0.228 train_accuracy: 0.817 test_accuracy: 0.808\n",
            "[2,   535] train_loss: 0.884 train_accuracy: 0.811 test_accuracy: 0.794\n",
            "[2,   540] train_loss: 0.761 train_accuracy: 0.825 test_accuracy: 0.804\n",
            "[2,   545] train_loss: 0.410 train_accuracy: 0.790 test_accuracy: 0.765\n",
            "[2,   550] train_loss: 0.819 train_accuracy: 0.777 test_accuracy: 0.755\n",
            "[2,   555] train_loss: 1.519 train_accuracy: 0.801 test_accuracy: 0.780\n",
            "[2,   560] train_loss: 0.454 train_accuracy: 0.793 test_accuracy: 0.786\n",
            "[2,   565] train_loss: 0.792 train_accuracy: 0.772 test_accuracy: 0.774\n",
            "[2,   570] train_loss: 0.887 train_accuracy: 0.741 test_accuracy: 0.748\n",
            "[2,   575] train_loss: 0.702 train_accuracy: 0.733 test_accuracy: 0.736\n",
            "[2,   580] train_loss: 1.080 train_accuracy: 0.786 test_accuracy: 0.793\n",
            "[2,   585] train_loss: 1.038 train_accuracy: 0.737 test_accuracy: 0.745\n",
            "[2,   590] train_loss: 0.436 train_accuracy: 0.708 test_accuracy: 0.709\n",
            "[2,   595] train_loss: 1.061 train_accuracy: 0.704 test_accuracy: 0.693\n",
            "[2,   600] train_loss: 1.023 train_accuracy: 0.654 test_accuracy: 0.682\n",
            "[2,   605] train_loss: 1.382 train_accuracy: 0.765 test_accuracy: 0.782\n",
            "[2,   610] train_loss: 0.658 train_accuracy: 0.774 test_accuracy: 0.812\n",
            "[2,   615] train_loss: 0.607 train_accuracy: 0.764 test_accuracy: 0.818\n",
            "[2,   620] train_loss: 0.633 train_accuracy: 0.763 test_accuracy: 0.815\n",
            "[2,   625] train_loss: 0.491 train_accuracy: 0.800 test_accuracy: 0.813\n",
            "[2,   630] train_loss: 0.627 train_accuracy: 0.795 test_accuracy: 0.804\n",
            "[2,   635] train_loss: 0.546 train_accuracy: 0.782 test_accuracy: 0.797\n",
            "[2,   640] train_loss: 0.558 train_accuracy: 0.796 test_accuracy: 0.804\n",
            "[2,   645] train_loss: 0.726 train_accuracy: 0.808 test_accuracy: 0.803\n",
            "[2,   650] train_loss: 0.423 train_accuracy: 0.781 test_accuracy: 0.764\n",
            "[2,   655] train_loss: 0.494 train_accuracy: 0.778 test_accuracy: 0.748\n",
            "[2,   660] train_loss: 0.419 train_accuracy: 0.798 test_accuracy: 0.783\n",
            "[2,   665] train_loss: 0.938 train_accuracy: 0.771 test_accuracy: 0.766\n",
            "[2,   670] train_loss: 0.790 train_accuracy: 0.783 test_accuracy: 0.799\n",
            "[2,   675] train_loss: 0.365 train_accuracy: 0.775 test_accuracy: 0.793\n",
            "[2,   680] train_loss: 0.789 train_accuracy: 0.816 test_accuracy: 0.823\n",
            "[2,   685] train_loss: 0.975 train_accuracy: 0.807 test_accuracy: 0.834\n",
            "[2,   690] train_loss: 0.414 train_accuracy: 0.791 test_accuracy: 0.821\n",
            "[2,   695] train_loss: 0.670 train_accuracy: 0.814 test_accuracy: 0.823\n",
            "[2,   700] train_loss: 0.292 train_accuracy: 0.827 test_accuracy: 0.814\n",
            "[2,   705] train_loss: 0.647 train_accuracy: 0.823 test_accuracy: 0.816\n",
            "[2,   710] train_loss: 0.582 train_accuracy: 0.813 test_accuracy: 0.814\n",
            "[2,   715] train_loss: 0.919 train_accuracy: 0.777 test_accuracy: 0.788\n",
            "[2,   720] train_loss: 0.538 train_accuracy: 0.783 test_accuracy: 0.797\n",
            "[2,   725] train_loss: 1.095 train_accuracy: 0.791 test_accuracy: 0.794\n",
            "[2,   730] train_loss: 0.424 train_accuracy: 0.810 test_accuracy: 0.808\n",
            "[2,   735] train_loss: 0.790 train_accuracy: 0.832 test_accuracy: 0.813\n",
            "[2,   740] train_loss: 0.430 train_accuracy: 0.813 test_accuracy: 0.783\n",
            "[2,   745] train_loss: 0.621 train_accuracy: 0.787 test_accuracy: 0.765\n",
            "[2,   750] train_loss: 0.891 train_accuracy: 0.743 test_accuracy: 0.738\n",
            "[2,   755] train_loss: 1.002 train_accuracy: 0.757 test_accuracy: 0.768\n",
            "[2,   760] train_loss: 0.720 train_accuracy: 0.788 test_accuracy: 0.804\n",
            "[2,   765] train_loss: 0.666 train_accuracy: 0.788 test_accuracy: 0.811\n",
            "[2,   770] train_loss: 0.690 train_accuracy: 0.833 test_accuracy: 0.833\n",
            "[2,   775] train_loss: 0.606 train_accuracy: 0.844 test_accuracy: 0.841\n",
            "[2,   780] train_loss: 0.573 train_accuracy: 0.849 test_accuracy: 0.848\n",
            "[2,   785] train_loss: 0.820 train_accuracy: 0.849 test_accuracy: 0.843\n",
            "[2,   790] train_loss: 1.105 train_accuracy: 0.852 test_accuracy: 0.842\n",
            "[2,   795] train_loss: 0.357 train_accuracy: 0.834 test_accuracy: 0.818\n",
            "[2,   800] train_loss: 0.716 train_accuracy: 0.816 test_accuracy: 0.809\n",
            "[2,   805] train_loss: 0.447 train_accuracy: 0.819 test_accuracy: 0.812\n",
            "[2,   810] train_loss: 0.898 train_accuracy: 0.820 test_accuracy: 0.808\n",
            "[2,   815] train_loss: 0.803 train_accuracy: 0.825 test_accuracy: 0.823\n",
            "[2,   820] train_loss: 0.702 train_accuracy: 0.825 test_accuracy: 0.816\n",
            "[2,   825] train_loss: 0.619 train_accuracy: 0.806 test_accuracy: 0.819\n",
            "[2,   830] train_loss: 0.600 train_accuracy: 0.783 test_accuracy: 0.811\n",
            "[2,   835] train_loss: 0.918 train_accuracy: 0.814 test_accuracy: 0.833\n",
            "[2,   840] train_loss: 0.620 train_accuracy: 0.796 test_accuracy: 0.812\n",
            "[2,   845] train_loss: 0.580 train_accuracy: 0.786 test_accuracy: 0.793\n",
            "[2,   850] train_loss: 0.761 train_accuracy: 0.807 test_accuracy: 0.776\n",
            "[2,   855] train_loss: 0.442 train_accuracy: 0.784 test_accuracy: 0.741\n",
            "[2,   860] train_loss: 0.668 train_accuracy: 0.787 test_accuracy: 0.757\n",
            "[2,   865] train_loss: 0.365 train_accuracy: 0.826 test_accuracy: 0.829\n",
            "[2,   870] train_loss: 0.530 train_accuracy: 0.812 test_accuracy: 0.846\n",
            "[2,   875] train_loss: 0.595 train_accuracy: 0.797 test_accuracy: 0.845\n",
            "[2,   880] train_loss: 0.724 train_accuracy: 0.788 test_accuracy: 0.849\n",
            "[2,   885] train_loss: 0.657 train_accuracy: 0.759 test_accuracy: 0.814\n",
            "[2,   890] train_loss: 0.587 train_accuracy: 0.772 test_accuracy: 0.820\n",
            "[2,   895] train_loss: 0.578 train_accuracy: 0.811 test_accuracy: 0.816\n",
            "[2,   900] train_loss: 0.537 train_accuracy: 0.767 test_accuracy: 0.756\n",
            "[2,   905] train_loss: 0.613 train_accuracy: 0.779 test_accuracy: 0.775\n",
            "[2,   910] train_loss: 0.444 train_accuracy: 0.815 test_accuracy: 0.824\n",
            "[2,   915] train_loss: 0.828 train_accuracy: 0.790 test_accuracy: 0.831\n",
            "[2,   920] train_loss: 0.328 train_accuracy: 0.767 test_accuracy: 0.810\n",
            "[2,   925] train_loss: 0.841 train_accuracy: 0.782 test_accuracy: 0.815\n",
            "[2,   930] train_loss: 0.367 train_accuracy: 0.795 test_accuracy: 0.819\n",
            "[2,   935] train_loss: 0.554 train_accuracy: 0.807 test_accuracy: 0.823\n",
            "[2,   940] train_loss: 0.596 train_accuracy: 0.820 test_accuracy: 0.828\n",
            "[2,   945] train_loss: 0.425 train_accuracy: 0.821 test_accuracy: 0.820\n",
            "[2,   950] train_loss: 0.584 train_accuracy: 0.828 test_accuracy: 0.819\n",
            "[2,   955] train_loss: 0.436 train_accuracy: 0.847 test_accuracy: 0.838\n",
            "[2,   960] train_loss: 0.511 train_accuracy: 0.840 test_accuracy: 0.837\n",
            "[2,   965] train_loss: 0.602 train_accuracy: 0.842 test_accuracy: 0.844\n",
            "[2,   970] train_loss: 0.745 train_accuracy: 0.839 test_accuracy: 0.839\n",
            "[2,   975] train_loss: 0.420 train_accuracy: 0.830 test_accuracy: 0.817\n",
            "[2,   980] train_loss: 0.535 train_accuracy: 0.834 test_accuracy: 0.821\n",
            "[2,   985] train_loss: 0.628 train_accuracy: 0.852 test_accuracy: 0.846\n",
            "[2,   990] train_loss: 0.422 train_accuracy: 0.857 test_accuracy: 0.862\n",
            "[2,   995] train_loss: 0.642 train_accuracy: 0.853 test_accuracy: 0.859\n",
            "[2,  1000] train_loss: 0.415 train_accuracy: 0.842 test_accuracy: 0.846\n",
            "[2,  1005] train_loss: 0.291 train_accuracy: 0.820 test_accuracy: 0.826\n",
            "[2,  1010] train_loss: 0.573 train_accuracy: 0.842 test_accuracy: 0.831\n",
            "[2,  1015] train_loss: 0.428 train_accuracy: 0.845 test_accuracy: 0.829\n",
            "[2,  1020] train_loss: 0.492 train_accuracy: 0.840 test_accuracy: 0.829\n",
            "[2,  1025] train_loss: 0.552 train_accuracy: 0.854 test_accuracy: 0.849\n",
            "[2,  1030] train_loss: 0.499 train_accuracy: 0.863 test_accuracy: 0.864\n",
            "[2,  1035] train_loss: 0.377 train_accuracy: 0.858 test_accuracy: 0.867\n",
            "[2,  1040] train_loss: 0.867 train_accuracy: 0.853 test_accuracy: 0.862\n",
            "[2,  1045] train_loss: 0.389 train_accuracy: 0.845 test_accuracy: 0.847\n",
            "[2,  1050] train_loss: 0.540 train_accuracy: 0.842 test_accuracy: 0.838\n",
            "[2,  1055] train_loss: 0.566 train_accuracy: 0.860 test_accuracy: 0.855\n",
            "[2,  1060] train_loss: 0.222 train_accuracy: 0.865 test_accuracy: 0.853\n",
            "[2,  1065] train_loss: 0.189 train_accuracy: 0.857 test_accuracy: 0.843\n",
            "[2,  1070] train_loss: 0.960 train_accuracy: 0.852 test_accuracy: 0.837\n",
            "[2,  1075] train_loss: 0.456 train_accuracy: 0.834 test_accuracy: 0.818\n",
            "[2,  1080] train_loss: 0.617 train_accuracy: 0.807 test_accuracy: 0.805\n",
            "[2,  1085] train_loss: 0.523 train_accuracy: 0.785 test_accuracy: 0.804\n",
            "[2,  1090] train_loss: 0.782 train_accuracy: 0.839 test_accuracy: 0.827\n",
            "[2,  1095] train_loss: 0.472 train_accuracy: 0.851 test_accuracy: 0.833\n",
            "[2,  1100] train_loss: 0.233 train_accuracy: 0.836 test_accuracy: 0.825\n",
            "[2,  1105] train_loss: 0.506 train_accuracy: 0.842 test_accuracy: 0.838\n",
            "[2,  1110] train_loss: 0.812 train_accuracy: 0.850 test_accuracy: 0.845\n",
            "[2,  1115] train_loss: 0.210 train_accuracy: 0.837 test_accuracy: 0.851\n",
            "[2,  1120] train_loss: 0.370 train_accuracy: 0.804 test_accuracy: 0.836\n",
            "[2,  1125] train_loss: 0.665 train_accuracy: 0.780 test_accuracy: 0.809\n",
            "[2,  1130] train_loss: 0.779 train_accuracy: 0.770 test_accuracy: 0.779\n",
            "[2,  1135] train_loss: 0.582 train_accuracy: 0.781 test_accuracy: 0.777\n",
            "[2,  1140] train_loss: 2.078 train_accuracy: 0.808 test_accuracy: 0.812\n",
            "[2,  1145] train_loss: 0.635 train_accuracy: 0.811 test_accuracy: 0.821\n",
            "[2,  1150] train_loss: 0.463 train_accuracy: 0.793 test_accuracy: 0.818\n",
            "[2,  1155] train_loss: 0.653 train_accuracy: 0.774 test_accuracy: 0.801\n",
            "[2,  1160] train_loss: 0.748 train_accuracy: 0.755 test_accuracy: 0.791\n",
            "[2,  1165] train_loss: 0.452 train_accuracy: 0.766 test_accuracy: 0.799\n",
            "[2,  1170] train_loss: 0.681 train_accuracy: 0.799 test_accuracy: 0.806\n",
            "[2,  1175] train_loss: 0.823 train_accuracy: 0.792 test_accuracy: 0.781\n",
            "[2,  1180] train_loss: 0.672 train_accuracy: 0.803 test_accuracy: 0.782\n",
            "[2,  1185] train_loss: 0.615 train_accuracy: 0.822 test_accuracy: 0.795\n",
            "[2,  1190] train_loss: 0.682 train_accuracy: 0.832 test_accuracy: 0.810\n",
            "[2,  1195] train_loss: 0.375 train_accuracy: 0.837 test_accuracy: 0.822\n",
            "[2,  1200] train_loss: 0.258 train_accuracy: 0.848 test_accuracy: 0.841\n",
            "[2,  1205] train_loss: 0.513 train_accuracy: 0.855 test_accuracy: 0.853\n",
            "[2,  1210] train_loss: 0.331 train_accuracy: 0.825 test_accuracy: 0.839\n",
            "[2,  1215] train_loss: 0.654 train_accuracy: 0.828 test_accuracy: 0.836\n",
            "[2,  1220] train_loss: 0.547 train_accuracy: 0.829 test_accuracy: 0.857\n",
            "[2,  1225] train_loss: 0.312 train_accuracy: 0.828 test_accuracy: 0.864\n",
            "[2,  1230] train_loss: 0.515 train_accuracy: 0.823 test_accuracy: 0.857\n",
            "[2,  1235] train_loss: 0.243 train_accuracy: 0.813 test_accuracy: 0.845\n",
            "[2,  1240] train_loss: 0.313 train_accuracy: 0.811 test_accuracy: 0.839\n",
            "[2,  1245] train_loss: 0.513 train_accuracy: 0.832 test_accuracy: 0.842\n",
            "[2,  1250] train_loss: 0.571 train_accuracy: 0.825 test_accuracy: 0.826\n",
            "[2,  1255] train_loss: 0.403 train_accuracy: 0.815 test_accuracy: 0.798\n",
            "[2,  1260] train_loss: 0.598 train_accuracy: 0.840 test_accuracy: 0.814\n",
            "[2,  1265] train_loss: 0.660 train_accuracy: 0.843 test_accuracy: 0.823\n",
            "[2,  1270] train_loss: 0.555 train_accuracy: 0.834 test_accuracy: 0.815\n",
            "[2,  1275] train_loss: 0.634 train_accuracy: 0.816 test_accuracy: 0.798\n",
            "[2,  1280] train_loss: 0.806 train_accuracy: 0.819 test_accuracy: 0.800\n",
            "[2,  1285] train_loss: 0.373 train_accuracy: 0.824 test_accuracy: 0.812\n",
            "[2,  1290] train_loss: 0.469 train_accuracy: 0.830 test_accuracy: 0.810\n",
            "[2,  1295] train_loss: 0.126 train_accuracy: 0.828 test_accuracy: 0.806\n",
            "[2,  1300] train_loss: 0.636 train_accuracy: 0.830 test_accuracy: 0.804\n",
            "[2,  1305] train_loss: 0.652 train_accuracy: 0.835 test_accuracy: 0.810\n",
            "[2,  1310] train_loss: 0.454 train_accuracy: 0.833 test_accuracy: 0.819\n",
            "[2,  1315] train_loss: 0.644 train_accuracy: 0.828 test_accuracy: 0.816\n",
            "[2,  1320] train_loss: 0.892 train_accuracy: 0.845 test_accuracy: 0.825\n",
            "[2,  1325] train_loss: 0.435 train_accuracy: 0.834 test_accuracy: 0.817\n",
            "[2,  1330] train_loss: 0.583 train_accuracy: 0.834 test_accuracy: 0.813\n",
            "[2,  1335] train_loss: 0.771 train_accuracy: 0.831 test_accuracy: 0.808\n",
            "[2,  1340] train_loss: 0.425 train_accuracy: 0.834 test_accuracy: 0.811\n",
            "[2,  1345] train_loss: 0.530 train_accuracy: 0.842 test_accuracy: 0.822\n",
            "[2,  1350] train_loss: 0.744 train_accuracy: 0.838 test_accuracy: 0.825\n",
            "[2,  1355] train_loss: 0.410 train_accuracy: 0.819 test_accuracy: 0.805\n",
            "[2,  1360] train_loss: 0.692 train_accuracy: 0.804 test_accuracy: 0.790\n",
            "[2,  1365] train_loss: 0.369 train_accuracy: 0.823 test_accuracy: 0.808\n",
            "[2,  1370] train_loss: 1.058 train_accuracy: 0.831 test_accuracy: 0.808\n",
            "[2,  1375] train_loss: 0.415 train_accuracy: 0.803 test_accuracy: 0.777\n",
            "[2,  1380] train_loss: 0.724 train_accuracy: 0.801 test_accuracy: 0.777\n",
            "[2,  1385] train_loss: 0.891 train_accuracy: 0.823 test_accuracy: 0.809\n",
            "[2,  1390] train_loss: 0.613 train_accuracy: 0.865 test_accuracy: 0.859\n",
            "[2,  1395] train_loss: 0.451 train_accuracy: 0.858 test_accuracy: 0.873\n",
            "[2,  1400] train_loss: 0.432 train_accuracy: 0.852 test_accuracy: 0.873\n",
            "[2,  1405] train_loss: 0.307 train_accuracy: 0.853 test_accuracy: 0.870\n",
            "[2,  1410] train_loss: 0.286 train_accuracy: 0.874 test_accuracy: 0.879\n",
            "[2,  1415] train_loss: 0.368 train_accuracy: 0.872 test_accuracy: 0.870\n",
            "[2,  1420] train_loss: 0.076 train_accuracy: 0.852 test_accuracy: 0.856\n",
            "[2,  1425] train_loss: 0.513 train_accuracy: 0.838 test_accuracy: 0.848\n",
            "[2,  1430] train_loss: 0.675 train_accuracy: 0.850 test_accuracy: 0.854\n",
            "[2,  1435] train_loss: 0.365 train_accuracy: 0.857 test_accuracy: 0.843\n",
            "[2,  1440] train_loss: 0.770 train_accuracy: 0.843 test_accuracy: 0.840\n",
            "[2,  1445] train_loss: 0.761 train_accuracy: 0.852 test_accuracy: 0.847\n",
            "[2,  1450] train_loss: 0.367 train_accuracy: 0.867 test_accuracy: 0.859\n",
            "[2,  1455] train_loss: 0.238 train_accuracy: 0.883 test_accuracy: 0.870\n",
            "[2,  1460] train_loss: 0.243 train_accuracy: 0.878 test_accuracy: 0.867\n",
            "[2,  1465] train_loss: 0.536 train_accuracy: 0.879 test_accuracy: 0.871\n",
            "[2,  1470] train_loss: 1.119 train_accuracy: 0.850 test_accuracy: 0.844\n",
            "[2,  1475] train_loss: 0.390 train_accuracy: 0.865 test_accuracy: 0.865\n",
            "[2,  1480] train_loss: 0.563 train_accuracy: 0.860 test_accuracy: 0.867\n",
            "[2,  1485] train_loss: 0.631 train_accuracy: 0.864 test_accuracy: 0.870\n",
            "[2,  1490] train_loss: 0.416 train_accuracy: 0.854 test_accuracy: 0.867\n",
            "[2,  1495] train_loss: 0.255 train_accuracy: 0.846 test_accuracy: 0.851\n",
            "[2,  1500] train_loss: 0.994 train_accuracy: 0.832 test_accuracy: 0.840\n",
            "[2,  1505] train_loss: 0.640 train_accuracy: 0.817 test_accuracy: 0.831\n",
            "[2,  1510] train_loss: 0.368 train_accuracy: 0.825 test_accuracy: 0.828\n",
            "[2,  1515] train_loss: 1.273 train_accuracy: 0.838 test_accuracy: 0.832\n",
            "[2,  1520] train_loss: 0.259 train_accuracy: 0.840 test_accuracy: 0.841\n",
            "[2,  1525] train_loss: 0.752 train_accuracy: 0.826 test_accuracy: 0.826\n",
            "[2,  1530] train_loss: 0.698 train_accuracy: 0.820 test_accuracy: 0.806\n",
            "[2,  1535] train_loss: 0.633 train_accuracy: 0.840 test_accuracy: 0.820\n",
            "[2,  1540] train_loss: 0.482 train_accuracy: 0.852 test_accuracy: 0.840\n",
            "[2,  1545] train_loss: 0.383 train_accuracy: 0.846 test_accuracy: 0.839\n",
            "[2,  1550] train_loss: 0.676 train_accuracy: 0.848 test_accuracy: 0.839\n",
            "[2,  1555] train_loss: 0.550 train_accuracy: 0.855 test_accuracy: 0.846\n",
            "[2,  1560] train_loss: 0.564 train_accuracy: 0.866 test_accuracy: 0.860\n",
            "[2,  1565] train_loss: 0.402 train_accuracy: 0.867 test_accuracy: 0.864\n",
            "[2,  1570] train_loss: 0.728 train_accuracy: 0.865 test_accuracy: 0.859\n",
            "[2,  1575] train_loss: 0.475 train_accuracy: 0.855 test_accuracy: 0.861\n",
            "[2,  1580] train_loss: 0.395 train_accuracy: 0.825 test_accuracy: 0.841\n",
            "[2,  1585] train_loss: 0.716 train_accuracy: 0.829 test_accuracy: 0.821\n",
            "[2,  1590] train_loss: 0.533 train_accuracy: 0.836 test_accuracy: 0.820\n",
            "[2,  1595] train_loss: 0.479 train_accuracy: 0.840 test_accuracy: 0.823\n",
            "[2,  1600] train_loss: 0.514 train_accuracy: 0.843 test_accuracy: 0.831\n",
            "[2,  1605] train_loss: 0.285 train_accuracy: 0.830 test_accuracy: 0.828\n",
            "[2,  1610] train_loss: 0.294 train_accuracy: 0.826 test_accuracy: 0.826\n",
            "[2,  1615] train_loss: 0.577 train_accuracy: 0.831 test_accuracy: 0.828\n",
            "[2,  1620] train_loss: 0.561 train_accuracy: 0.845 test_accuracy: 0.853\n",
            "[2,  1625] train_loss: 0.747 train_accuracy: 0.848 test_accuracy: 0.858\n",
            "[2,  1630] train_loss: 0.511 train_accuracy: 0.856 test_accuracy: 0.865\n",
            "[2,  1635] train_loss: 0.585 train_accuracy: 0.852 test_accuracy: 0.859\n",
            "[2,  1640] train_loss: 0.445 train_accuracy: 0.828 test_accuracy: 0.825\n",
            "[2,  1645] train_loss: 0.798 train_accuracy: 0.817 test_accuracy: 0.809\n",
            "[2,  1650] train_loss: 0.653 train_accuracy: 0.813 test_accuracy: 0.803\n",
            "[2,  1655] train_loss: 1.025 train_accuracy: 0.819 test_accuracy: 0.813\n",
            "[2,  1660] train_loss: 1.038 train_accuracy: 0.855 test_accuracy: 0.842\n",
            "[2,  1665] train_loss: 0.106 train_accuracy: 0.872 test_accuracy: 0.858\n",
            "[2,  1670] train_loss: 0.524 train_accuracy: 0.869 test_accuracy: 0.855\n",
            "[2,  1675] train_loss: 0.550 train_accuracy: 0.866 test_accuracy: 0.857\n",
            "[2,  1680] train_loss: 0.669 train_accuracy: 0.846 test_accuracy: 0.848\n",
            "[2,  1685] train_loss: 0.537 train_accuracy: 0.846 test_accuracy: 0.837\n",
            "[2,  1690] train_loss: 0.216 train_accuracy: 0.836 test_accuracy: 0.825\n",
            "[2,  1695] train_loss: 0.705 train_accuracy: 0.824 test_accuracy: 0.811\n",
            "[2,  1700] train_loss: 0.647 train_accuracy: 0.846 test_accuracy: 0.829\n",
            "[2,  1705] train_loss: 0.607 train_accuracy: 0.856 test_accuracy: 0.834\n",
            "[2,  1710] train_loss: 0.350 train_accuracy: 0.850 test_accuracy: 0.830\n",
            "[2,  1715] train_loss: 0.417 train_accuracy: 0.840 test_accuracy: 0.828\n",
            "[2,  1720] train_loss: 0.758 train_accuracy: 0.841 test_accuracy: 0.828\n",
            "[2,  1725] train_loss: 0.230 train_accuracy: 0.847 test_accuracy: 0.827\n",
            "[3,     5] train_loss: 0.639 train_accuracy: 0.843 test_accuracy: 0.832\n",
            "[3,    10] train_loss: 0.590 train_accuracy: 0.847 test_accuracy: 0.835\n",
            "[3,    15] train_loss: 0.932 train_accuracy: 0.835 test_accuracy: 0.822\n",
            "[3,    20] train_loss: 1.080 train_accuracy: 0.803 test_accuracy: 0.783\n",
            "[3,    25] train_loss: 1.300 train_accuracy: 0.847 test_accuracy: 0.828\n",
            "[3,    30] train_loss: 0.483 train_accuracy: 0.844 test_accuracy: 0.828\n",
            "[3,    35] train_loss: 0.523 train_accuracy: 0.831 test_accuracy: 0.827\n",
            "[3,    40] train_loss: 0.350 train_accuracy: 0.821 test_accuracy: 0.824\n",
            "[3,    45] train_loss: 0.545 train_accuracy: 0.823 test_accuracy: 0.822\n",
            "[3,    50] train_loss: 0.281 train_accuracy: 0.827 test_accuracy: 0.817\n",
            "[3,    55] train_loss: 0.382 train_accuracy: 0.835 test_accuracy: 0.823\n",
            "[3,    60] train_loss: 0.156 train_accuracy: 0.833 test_accuracy: 0.833\n",
            "[3,    65] train_loss: 0.428 train_accuracy: 0.847 test_accuracy: 0.832\n",
            "[3,    70] train_loss: 0.745 train_accuracy: 0.854 test_accuracy: 0.839\n",
            "[3,    75] train_loss: 0.305 train_accuracy: 0.850 test_accuracy: 0.832\n",
            "[3,    80] train_loss: 0.217 train_accuracy: 0.844 test_accuracy: 0.825\n",
            "[3,    85] train_loss: 0.472 train_accuracy: 0.833 test_accuracy: 0.808\n",
            "[3,    90] train_loss: 0.575 train_accuracy: 0.843 test_accuracy: 0.820\n",
            "[3,    95] train_loss: 0.653 train_accuracy: 0.840 test_accuracy: 0.822\n",
            "[3,   100] train_loss: 0.450 train_accuracy: 0.860 test_accuracy: 0.851\n",
            "[3,   105] train_loss: 0.234 train_accuracy: 0.867 test_accuracy: 0.861\n",
            "[3,   110] train_loss: 0.354 train_accuracy: 0.873 test_accuracy: 0.868\n",
            "[3,   115] train_loss: 0.563 train_accuracy: 0.869 test_accuracy: 0.858\n",
            "[3,   120] train_loss: 0.509 train_accuracy: 0.853 test_accuracy: 0.860\n",
            "[3,   125] train_loss: 0.360 train_accuracy: 0.870 test_accuracy: 0.870\n",
            "[3,   130] train_loss: 0.430 train_accuracy: 0.873 test_accuracy: 0.885\n",
            "[3,   135] train_loss: 0.263 train_accuracy: 0.867 test_accuracy: 0.884\n",
            "[3,   140] train_loss: 0.631 train_accuracy: 0.886 test_accuracy: 0.874\n",
            "[3,   145] train_loss: 0.675 train_accuracy: 0.871 test_accuracy: 0.847\n",
            "[3,   150] train_loss: 0.489 train_accuracy: 0.863 test_accuracy: 0.838\n",
            "[3,   155] train_loss: 0.302 train_accuracy: 0.855 test_accuracy: 0.831\n",
            "[3,   160] train_loss: 0.542 train_accuracy: 0.875 test_accuracy: 0.858\n",
            "[3,   165] train_loss: 0.672 train_accuracy: 0.862 test_accuracy: 0.855\n",
            "[3,   170] train_loss: 0.894 train_accuracy: 0.848 test_accuracy: 0.842\n",
            "[3,   175] train_loss: 0.357 train_accuracy: 0.854 test_accuracy: 0.839\n",
            "[3,   180] train_loss: 1.176 train_accuracy: 0.867 test_accuracy: 0.851\n",
            "[3,   185] train_loss: 0.253 train_accuracy: 0.861 test_accuracy: 0.849\n",
            "[3,   190] train_loss: 0.517 train_accuracy: 0.848 test_accuracy: 0.842\n",
            "[3,   195] train_loss: 0.388 train_accuracy: 0.864 test_accuracy: 0.855\n",
            "[3,   200] train_loss: 0.210 train_accuracy: 0.870 test_accuracy: 0.852\n",
            "[3,   205] train_loss: 0.784 train_accuracy: 0.871 test_accuracy: 0.851\n",
            "[3,   210] train_loss: 0.500 train_accuracy: 0.866 test_accuracy: 0.844\n",
            "[3,   215] train_loss: 0.657 train_accuracy: 0.861 test_accuracy: 0.837\n",
            "[3,   220] train_loss: 0.494 train_accuracy: 0.847 test_accuracy: 0.832\n",
            "[3,   225] train_loss: 0.710 train_accuracy: 0.865 test_accuracy: 0.856\n",
            "[3,   230] train_loss: 0.511 train_accuracy: 0.871 test_accuracy: 0.852\n",
            "[3,   235] train_loss: 0.109 train_accuracy: 0.863 test_accuracy: 0.839\n",
            "[3,   240] train_loss: 0.176 train_accuracy: 0.857 test_accuracy: 0.832\n",
            "[3,   245] train_loss: 0.270 train_accuracy: 0.851 test_accuracy: 0.824\n",
            "[3,   250] train_loss: 0.425 train_accuracy: 0.873 test_accuracy: 0.851\n",
            "[3,   255] train_loss: 0.324 train_accuracy: 0.834 test_accuracy: 0.854\n",
            "[3,   260] train_loss: 0.582 train_accuracy: 0.805 test_accuracy: 0.847\n",
            "[3,   265] train_loss: 0.731 train_accuracy: 0.811 test_accuracy: 0.850\n",
            "[3,   270] train_loss: 0.453 train_accuracy: 0.834 test_accuracy: 0.851\n",
            "[3,   275] train_loss: 0.372 train_accuracy: 0.831 test_accuracy: 0.844\n",
            "[3,   280] train_loss: 0.559 train_accuracy: 0.837 test_accuracy: 0.841\n",
            "[3,   285] train_loss: 0.312 train_accuracy: 0.852 test_accuracy: 0.829\n",
            "[3,   290] train_loss: 0.626 train_accuracy: 0.842 test_accuracy: 0.816\n",
            "[3,   295] train_loss: 0.662 train_accuracy: 0.849 test_accuracy: 0.826\n",
            "[3,   300] train_loss: 1.014 train_accuracy: 0.838 test_accuracy: 0.820\n",
            "[3,   305] train_loss: 0.746 train_accuracy: 0.813 test_accuracy: 0.810\n",
            "[3,   310] train_loss: 0.770 train_accuracy: 0.806 test_accuracy: 0.806\n",
            "[3,   315] train_loss: 0.540 train_accuracy: 0.824 test_accuracy: 0.823\n",
            "[3,   320] train_loss: 0.370 train_accuracy: 0.832 test_accuracy: 0.833\n",
            "[3,   325] train_loss: 0.965 train_accuracy: 0.828 test_accuracy: 0.837\n",
            "[3,   330] train_loss: 0.411 train_accuracy: 0.850 test_accuracy: 0.853\n",
            "[3,   335] train_loss: 0.399 train_accuracy: 0.858 test_accuracy: 0.856\n",
            "[3,   340] train_loss: 0.661 train_accuracy: 0.864 test_accuracy: 0.864\n",
            "[3,   345] train_loss: 0.527 train_accuracy: 0.866 test_accuracy: 0.864\n",
            "[3,   350] train_loss: 0.314 train_accuracy: 0.868 test_accuracy: 0.860\n",
            "[3,   355] train_loss: 0.424 train_accuracy: 0.870 test_accuracy: 0.855\n",
            "[3,   360] train_loss: 0.148 train_accuracy: 0.861 test_accuracy: 0.845\n",
            "[3,   365] train_loss: 0.633 train_accuracy: 0.865 test_accuracy: 0.850\n",
            "[3,   370] train_loss: 0.408 train_accuracy: 0.870 test_accuracy: 0.860\n",
            "[3,   375] train_loss: 0.616 train_accuracy: 0.881 test_accuracy: 0.869\n",
            "[3,   380] train_loss: 0.742 train_accuracy: 0.881 test_accuracy: 0.873\n",
            "[3,   385] train_loss: 0.245 train_accuracy: 0.866 test_accuracy: 0.859\n",
            "[3,   390] train_loss: 0.290 train_accuracy: 0.864 test_accuracy: 0.857\n",
            "[3,   395] train_loss: 0.397 train_accuracy: 0.867 test_accuracy: 0.862\n",
            "[3,   400] train_loss: 0.307 train_accuracy: 0.878 test_accuracy: 0.868\n",
            "[3,   405] train_loss: 0.303 train_accuracy: 0.887 test_accuracy: 0.877\n",
            "[3,   410] train_loss: 0.471 train_accuracy: 0.879 test_accuracy: 0.882\n",
            "[3,   415] train_loss: 0.172 train_accuracy: 0.866 test_accuracy: 0.883\n",
            "[3,   420] train_loss: 0.355 train_accuracy: 0.856 test_accuracy: 0.877\n",
            "[3,   425] train_loss: 0.278 train_accuracy: 0.864 test_accuracy: 0.875\n",
            "[3,   430] train_loss: 0.400 train_accuracy: 0.860 test_accuracy: 0.868\n",
            "[3,   435] train_loss: 0.561 train_accuracy: 0.851 test_accuracy: 0.859\n",
            "[3,   440] train_loss: 0.462 train_accuracy: 0.867 test_accuracy: 0.865\n",
            "[3,   445] train_loss: 0.600 train_accuracy: 0.863 test_accuracy: 0.860\n",
            "[3,   450] train_loss: 0.594 train_accuracy: 0.856 test_accuracy: 0.849\n",
            "[3,   455] train_loss: 0.475 train_accuracy: 0.853 test_accuracy: 0.850\n",
            "[3,   460] train_loss: 0.674 train_accuracy: 0.836 test_accuracy: 0.842\n",
            "[3,   465] train_loss: 0.396 train_accuracy: 0.831 test_accuracy: 0.832\n",
            "[3,   470] train_loss: 0.819 train_accuracy: 0.875 test_accuracy: 0.861\n",
            "[3,   475] train_loss: 0.532 train_accuracy: 0.877 test_accuracy: 0.860\n",
            "[3,   480] train_loss: 0.657 train_accuracy: 0.857 test_accuracy: 0.842\n",
            "[3,   485] train_loss: 0.290 train_accuracy: 0.842 test_accuracy: 0.826\n",
            "[3,   490] train_loss: 0.743 train_accuracy: 0.846 test_accuracy: 0.826\n",
            "[3,   495] train_loss: 0.499 train_accuracy: 0.833 test_accuracy: 0.802\n",
            "[3,   500] train_loss: 1.169 train_accuracy: 0.872 test_accuracy: 0.853\n",
            "[3,   505] train_loss: 0.196 train_accuracy: 0.856 test_accuracy: 0.863\n",
            "[3,   510] train_loss: 0.506 train_accuracy: 0.882 test_accuracy: 0.875\n",
            "[3,   515] train_loss: 0.262 train_accuracy: 0.880 test_accuracy: 0.872\n",
            "[3,   520] train_loss: 0.337 train_accuracy: 0.882 test_accuracy: 0.870\n",
            "[3,   525] train_loss: 1.024 train_accuracy: 0.868 test_accuracy: 0.851\n",
            "[3,   530] train_loss: 0.434 train_accuracy: 0.844 test_accuracy: 0.821\n",
            "[3,   535] train_loss: 0.850 train_accuracy: 0.827 test_accuracy: 0.807\n",
            "[3,   540] train_loss: 0.575 train_accuracy: 0.822 test_accuracy: 0.803\n",
            "[3,   545] train_loss: 0.689 train_accuracy: 0.830 test_accuracy: 0.812\n",
            "[3,   550] train_loss: 0.607 train_accuracy: 0.834 test_accuracy: 0.819\n",
            "[3,   555] train_loss: 0.348 train_accuracy: 0.838 test_accuracy: 0.827\n",
            "[3,   560] train_loss: 0.272 train_accuracy: 0.846 test_accuracy: 0.837\n",
            "[3,   565] train_loss: 0.566 train_accuracy: 0.856 test_accuracy: 0.850\n",
            "[3,   570] train_loss: 0.725 train_accuracy: 0.854 test_accuracy: 0.842\n",
            "[3,   575] train_loss: 0.341 train_accuracy: 0.855 test_accuracy: 0.839\n",
            "[3,   580] train_loss: 0.512 train_accuracy: 0.861 test_accuracy: 0.851\n",
            "[3,   585] train_loss: 0.198 train_accuracy: 0.857 test_accuracy: 0.862\n",
            "[3,   590] train_loss: 0.428 train_accuracy: 0.840 test_accuracy: 0.856\n",
            "[3,   595] train_loss: 0.837 train_accuracy: 0.849 test_accuracy: 0.866\n",
            "[3,   600] train_loss: 0.329 train_accuracy: 0.860 test_accuracy: 0.870\n",
            "[3,   605] train_loss: 0.302 train_accuracy: 0.863 test_accuracy: 0.868\n",
            "[3,   610] train_loss: 0.609 train_accuracy: 0.866 test_accuracy: 0.871\n",
            "[3,   615] train_loss: 0.236 train_accuracy: 0.870 test_accuracy: 0.861\n",
            "[3,   620] train_loss: 0.296 train_accuracy: 0.870 test_accuracy: 0.851\n",
            "[3,   625] train_loss: 0.254 train_accuracy: 0.863 test_accuracy: 0.842\n",
            "[3,   630] train_loss: 0.414 train_accuracy: 0.862 test_accuracy: 0.841\n",
            "[3,   635] train_loss: 0.315 train_accuracy: 0.849 test_accuracy: 0.821\n",
            "[3,   640] train_loss: 0.894 train_accuracy: 0.865 test_accuracy: 0.846\n",
            "[3,   645] train_loss: 0.448 train_accuracy: 0.863 test_accuracy: 0.869\n",
            "[3,   650] train_loss: 0.690 train_accuracy: 0.880 test_accuracy: 0.871\n",
            "[3,   655] train_loss: 0.908 train_accuracy: 0.877 test_accuracy: 0.864\n",
            "[3,   660] train_loss: 0.412 train_accuracy: 0.857 test_accuracy: 0.844\n",
            "[3,   665] train_loss: 0.371 train_accuracy: 0.871 test_accuracy: 0.851\n",
            "[3,   670] train_loss: 0.503 train_accuracy: 0.882 test_accuracy: 0.860\n",
            "[3,   675] train_loss: 0.321 train_accuracy: 0.886 test_accuracy: 0.862\n",
            "[3,   680] train_loss: 0.280 train_accuracy: 0.882 test_accuracy: 0.861\n",
            "[3,   685] train_loss: 0.357 train_accuracy: 0.876 test_accuracy: 0.862\n",
            "[3,   690] train_loss: 0.850 train_accuracy: 0.869 test_accuracy: 0.855\n",
            "[3,   695] train_loss: 0.604 train_accuracy: 0.868 test_accuracy: 0.856\n",
            "[3,   700] train_loss: 0.705 train_accuracy: 0.873 test_accuracy: 0.868\n",
            "[3,   705] train_loss: 0.074 train_accuracy: 0.876 test_accuracy: 0.872\n",
            "[3,   710] train_loss: 0.486 train_accuracy: 0.890 test_accuracy: 0.876\n",
            "[3,   715] train_loss: 0.669 train_accuracy: 0.888 test_accuracy: 0.871\n",
            "[3,   720] train_loss: 0.117 train_accuracy: 0.881 test_accuracy: 0.866\n",
            "[3,   725] train_loss: 0.337 train_accuracy: 0.881 test_accuracy: 0.861\n",
            "[3,   730] train_loss: 0.403 train_accuracy: 0.873 test_accuracy: 0.854\n",
            "[3,   735] train_loss: 0.511 train_accuracy: 0.869 test_accuracy: 0.856\n",
            "[3,   740] train_loss: 0.645 train_accuracy: 0.854 test_accuracy: 0.849\n",
            "[3,   745] train_loss: 0.575 train_accuracy: 0.844 test_accuracy: 0.837\n",
            "[3,   750] train_loss: 0.478 train_accuracy: 0.841 test_accuracy: 0.839\n",
            "[3,   755] train_loss: 0.058 train_accuracy: 0.850 test_accuracy: 0.852\n",
            "[3,   760] train_loss: 0.471 train_accuracy: 0.838 test_accuracy: 0.849\n",
            "[3,   765] train_loss: 1.090 train_accuracy: 0.864 test_accuracy: 0.867\n",
            "[3,   770] train_loss: 0.582 train_accuracy: 0.859 test_accuracy: 0.857\n",
            "[3,   775] train_loss: 0.179 train_accuracy: 0.862 test_accuracy: 0.851\n",
            "[3,   780] train_loss: 0.652 train_accuracy: 0.860 test_accuracy: 0.843\n",
            "[3,   785] train_loss: 0.128 train_accuracy: 0.858 test_accuracy: 0.837\n",
            "[3,   790] train_loss: 0.266 train_accuracy: 0.860 test_accuracy: 0.837\n",
            "[3,   795] train_loss: 0.448 train_accuracy: 0.865 test_accuracy: 0.849\n",
            "[3,   800] train_loss: 0.270 train_accuracy: 0.867 test_accuracy: 0.859\n",
            "[3,   805] train_loss: 0.779 train_accuracy: 0.876 test_accuracy: 0.868\n",
            "[3,   810] train_loss: 0.548 train_accuracy: 0.874 test_accuracy: 0.869\n",
            "[3,   815] train_loss: 0.641 train_accuracy: 0.861 test_accuracy: 0.862\n",
            "[3,   820] train_loss: 0.633 train_accuracy: 0.867 test_accuracy: 0.869\n",
            "[3,   825] train_loss: 0.407 train_accuracy: 0.885 test_accuracy: 0.869\n",
            "[3,   830] train_loss: 0.290 train_accuracy: 0.882 test_accuracy: 0.854\n",
            "[3,   835] train_loss: 0.357 train_accuracy: 0.872 test_accuracy: 0.847\n",
            "[3,   840] train_loss: 0.299 train_accuracy: 0.873 test_accuracy: 0.850\n",
            "[3,   845] train_loss: 0.387 train_accuracy: 0.880 test_accuracy: 0.854\n",
            "[3,   850] train_loss: 0.258 train_accuracy: 0.868 test_accuracy: 0.842\n",
            "[3,   855] train_loss: 0.055 train_accuracy: 0.850 test_accuracy: 0.823\n",
            "[3,   860] train_loss: 0.576 train_accuracy: 0.850 test_accuracy: 0.825\n",
            "[3,   865] train_loss: 0.814 train_accuracy: 0.874 test_accuracy: 0.850\n",
            "[3,   870] train_loss: 0.375 train_accuracy: 0.882 test_accuracy: 0.869\n",
            "[3,   875] train_loss: 0.612 train_accuracy: 0.881 test_accuracy: 0.876\n",
            "[3,   880] train_loss: 0.466 train_accuracy: 0.891 test_accuracy: 0.877\n",
            "[3,   885] train_loss: 0.849 train_accuracy: 0.884 test_accuracy: 0.879\n",
            "[3,   890] train_loss: 0.438 train_accuracy: 0.881 test_accuracy: 0.882\n",
            "[3,   895] train_loss: 0.689 train_accuracy: 0.876 test_accuracy: 0.882\n",
            "[3,   900] train_loss: 1.183 train_accuracy: 0.864 test_accuracy: 0.869\n",
            "[3,   905] train_loss: 0.593 train_accuracy: 0.857 test_accuracy: 0.848\n",
            "[3,   910] train_loss: 0.616 train_accuracy: 0.856 test_accuracy: 0.842\n",
            "[3,   915] train_loss: 0.664 train_accuracy: 0.858 test_accuracy: 0.846\n",
            "[3,   920] train_loss: 1.067 train_accuracy: 0.866 test_accuracy: 0.856\n",
            "[3,   925] train_loss: 0.405 train_accuracy: 0.869 test_accuracy: 0.866\n",
            "[3,   930] train_loss: 0.692 train_accuracy: 0.869 test_accuracy: 0.868\n",
            "[3,   935] train_loss: 0.225 train_accuracy: 0.865 test_accuracy: 0.867\n",
            "[3,   940] train_loss: 0.390 train_accuracy: 0.871 test_accuracy: 0.865\n",
            "[3,   945] train_loss: 0.359 train_accuracy: 0.876 test_accuracy: 0.864\n",
            "[3,   950] train_loss: 0.305 train_accuracy: 0.879 test_accuracy: 0.862\n",
            "[3,   955] train_loss: 0.687 train_accuracy: 0.878 test_accuracy: 0.858\n",
            "[3,   960] train_loss: 0.269 train_accuracy: 0.863 test_accuracy: 0.840\n",
            "[3,   965] train_loss: 0.356 train_accuracy: 0.857 test_accuracy: 0.836\n",
            "[3,   970] train_loss: 0.309 train_accuracy: 0.859 test_accuracy: 0.843\n",
            "[3,   975] train_loss: 0.433 train_accuracy: 0.866 test_accuracy: 0.852\n",
            "[3,   980] train_loss: 0.445 train_accuracy: 0.861 test_accuracy: 0.858\n",
            "[3,   985] train_loss: 0.674 train_accuracy: 0.839 test_accuracy: 0.860\n",
            "[3,   990] train_loss: 0.907 train_accuracy: 0.841 test_accuracy: 0.873\n",
            "[3,   995] train_loss: 0.526 train_accuracy: 0.830 test_accuracy: 0.858\n",
            "[3,  1000] train_loss: 0.487 train_accuracy: 0.832 test_accuracy: 0.843\n",
            "[3,  1005] train_loss: 0.299 train_accuracy: 0.866 test_accuracy: 0.861\n",
            "[3,  1010] train_loss: 0.313 train_accuracy: 0.869 test_accuracy: 0.857\n",
            "[3,  1015] train_loss: 0.444 train_accuracy: 0.871 test_accuracy: 0.854\n",
            "[3,  1020] train_loss: 0.422 train_accuracy: 0.888 test_accuracy: 0.868\n",
            "[3,  1025] train_loss: 0.744 train_accuracy: 0.891 test_accuracy: 0.875\n",
            "[3,  1030] train_loss: 0.232 train_accuracy: 0.885 test_accuracy: 0.868\n",
            "[3,  1035] train_loss: 0.567 train_accuracy: 0.882 test_accuracy: 0.865\n",
            "[3,  1040] train_loss: 0.173 train_accuracy: 0.884 test_accuracy: 0.868\n",
            "[3,  1045] train_loss: 0.296 train_accuracy: 0.879 test_accuracy: 0.876\n",
            "[3,  1050] train_loss: 0.401 train_accuracy: 0.891 test_accuracy: 0.878\n",
            "[3,  1055] train_loss: 0.306 train_accuracy: 0.894 test_accuracy: 0.882\n",
            "[3,  1060] train_loss: 0.386 train_accuracy: 0.883 test_accuracy: 0.869\n",
            "[3,  1065] train_loss: 0.268 train_accuracy: 0.877 test_accuracy: 0.860\n",
            "[3,  1070] train_loss: 0.410 train_accuracy: 0.879 test_accuracy: 0.863\n",
            "[3,  1075] train_loss: 0.752 train_accuracy: 0.870 test_accuracy: 0.853\n",
            "[3,  1080] train_loss: 0.365 train_accuracy: 0.875 test_accuracy: 0.851\n",
            "[3,  1085] train_loss: 0.622 train_accuracy: 0.889 test_accuracy: 0.868\n",
            "[3,  1090] train_loss: 0.479 train_accuracy: 0.892 test_accuracy: 0.877\n",
            "[3,  1095] train_loss: 0.651 train_accuracy: 0.889 test_accuracy: 0.881\n",
            "[3,  1100] train_loss: 0.594 train_accuracy: 0.886 test_accuracy: 0.881\n",
            "[3,  1105] train_loss: 0.323 train_accuracy: 0.885 test_accuracy: 0.890\n",
            "[3,  1110] train_loss: 0.336 train_accuracy: 0.874 test_accuracy: 0.890\n",
            "[3,  1115] train_loss: 0.493 train_accuracy: 0.867 test_accuracy: 0.887\n",
            "[3,  1120] train_loss: 0.582 train_accuracy: 0.879 test_accuracy: 0.882\n",
            "[3,  1125] train_loss: 0.587 train_accuracy: 0.887 test_accuracy: 0.870\n",
            "[3,  1130] train_loss: 0.628 train_accuracy: 0.883 test_accuracy: 0.861\n",
            "[3,  1135] train_loss: 0.819 train_accuracy: 0.883 test_accuracy: 0.862\n",
            "[3,  1140] train_loss: 0.141 train_accuracy: 0.886 test_accuracy: 0.869\n",
            "[3,  1145] train_loss: 0.117 train_accuracy: 0.884 test_accuracy: 0.862\n",
            "[3,  1150] train_loss: 0.508 train_accuracy: 0.888 test_accuracy: 0.868\n",
            "[3,  1155] train_loss: 0.276 train_accuracy: 0.889 test_accuracy: 0.871\n",
            "[3,  1160] train_loss: 0.305 train_accuracy: 0.881 test_accuracy: 0.863\n",
            "[3,  1165] train_loss: 0.847 train_accuracy: 0.877 test_accuracy: 0.852\n",
            "[3,  1170] train_loss: 0.217 train_accuracy: 0.879 test_accuracy: 0.855\n",
            "[3,  1175] train_loss: 0.591 train_accuracy: 0.888 test_accuracy: 0.863\n",
            "[3,  1180] train_loss: 0.681 train_accuracy: 0.889 test_accuracy: 0.863\n",
            "[3,  1185] train_loss: 0.236 train_accuracy: 0.883 test_accuracy: 0.858\n",
            "[3,  1190] train_loss: 0.647 train_accuracy: 0.879 test_accuracy: 0.859\n",
            "[3,  1195] train_loss: 0.656 train_accuracy: 0.870 test_accuracy: 0.843\n",
            "[3,  1200] train_loss: 0.445 train_accuracy: 0.874 test_accuracy: 0.850\n",
            "[3,  1205] train_loss: 0.347 train_accuracy: 0.871 test_accuracy: 0.854\n",
            "[3,  1210] train_loss: 0.408 train_accuracy: 0.877 test_accuracy: 0.859\n",
            "[3,  1215] train_loss: 0.127 train_accuracy: 0.886 test_accuracy: 0.868\n",
            "[3,  1220] train_loss: 0.386 train_accuracy: 0.898 test_accuracy: 0.874\n",
            "[3,  1225] train_loss: 0.255 train_accuracy: 0.894 test_accuracy: 0.862\n",
            "[3,  1230] train_loss: 0.545 train_accuracy: 0.886 test_accuracy: 0.849\n",
            "[3,  1235] train_loss: 0.496 train_accuracy: 0.893 test_accuracy: 0.863\n",
            "[3,  1240] train_loss: 0.587 train_accuracy: 0.897 test_accuracy: 0.877\n",
            "[3,  1245] train_loss: 0.658 train_accuracy: 0.892 test_accuracy: 0.878\n",
            "[3,  1250] train_loss: 0.742 train_accuracy: 0.885 test_accuracy: 0.870\n",
            "[3,  1255] train_loss: 0.477 train_accuracy: 0.880 test_accuracy: 0.861\n",
            "[3,  1260] train_loss: 0.370 train_accuracy: 0.876 test_accuracy: 0.862\n",
            "[3,  1265] train_loss: 0.396 train_accuracy: 0.862 test_accuracy: 0.864\n",
            "[3,  1270] train_loss: 0.446 train_accuracy: 0.826 test_accuracy: 0.840\n",
            "[3,  1275] train_loss: 0.497 train_accuracy: 0.813 test_accuracy: 0.833\n",
            "[3,  1280] train_loss: 0.497 train_accuracy: 0.829 test_accuracy: 0.849\n",
            "[3,  1285] train_loss: 0.636 train_accuracy: 0.865 test_accuracy: 0.866\n",
            "[3,  1290] train_loss: 0.414 train_accuracy: 0.859 test_accuracy: 0.861\n",
            "[3,  1295] train_loss: 0.608 train_accuracy: 0.856 test_accuracy: 0.839\n",
            "[3,  1300] train_loss: 0.726 train_accuracy: 0.868 test_accuracy: 0.848\n",
            "[3,  1305] train_loss: 0.561 train_accuracy: 0.833 test_accuracy: 0.817\n",
            "[3,  1310] train_loss: 0.195 train_accuracy: 0.864 test_accuracy: 0.851\n",
            "[3,  1315] train_loss: 0.403 train_accuracy: 0.877 test_accuracy: 0.859\n",
            "[3,  1320] train_loss: 0.508 train_accuracy: 0.883 test_accuracy: 0.857\n",
            "[3,  1325] train_loss: 0.377 train_accuracy: 0.877 test_accuracy: 0.847\n",
            "[3,  1330] train_loss: 0.605 train_accuracy: 0.887 test_accuracy: 0.855\n",
            "[3,  1335] train_loss: 1.063 train_accuracy: 0.889 test_accuracy: 0.861\n",
            "[3,  1340] train_loss: 0.562 train_accuracy: 0.862 test_accuracy: 0.855\n",
            "[3,  1345] train_loss: 0.542 train_accuracy: 0.851 test_accuracy: 0.847\n",
            "[3,  1350] train_loss: 0.597 train_accuracy: 0.875 test_accuracy: 0.856\n",
            "[3,  1355] train_loss: 0.900 train_accuracy: 0.874 test_accuracy: 0.849\n",
            "[3,  1360] train_loss: 0.796 train_accuracy: 0.872 test_accuracy: 0.851\n",
            "[3,  1365] train_loss: 0.263 train_accuracy: 0.873 test_accuracy: 0.856\n",
            "[3,  1370] train_loss: 0.455 train_accuracy: 0.882 test_accuracy: 0.868\n",
            "[3,  1375] train_loss: 0.481 train_accuracy: 0.890 test_accuracy: 0.878\n",
            "[3,  1380] train_loss: 0.555 train_accuracy: 0.898 test_accuracy: 0.886\n",
            "[3,  1385] train_loss: 0.493 train_accuracy: 0.901 test_accuracy: 0.886\n",
            "[3,  1390] train_loss: 0.507 train_accuracy: 0.905 test_accuracy: 0.884\n",
            "[3,  1395] train_loss: 0.379 train_accuracy: 0.901 test_accuracy: 0.886\n",
            "[3,  1400] train_loss: 0.083 train_accuracy: 0.895 test_accuracy: 0.880\n",
            "[3,  1405] train_loss: 0.572 train_accuracy: 0.870 test_accuracy: 0.862\n",
            "[3,  1410] train_loss: 0.237 train_accuracy: 0.870 test_accuracy: 0.863\n",
            "[3,  1415] train_loss: 0.191 train_accuracy: 0.890 test_accuracy: 0.874\n",
            "[3,  1420] train_loss: 0.137 train_accuracy: 0.901 test_accuracy: 0.886\n",
            "[3,  1425] train_loss: 0.221 train_accuracy: 0.905 test_accuracy: 0.891\n",
            "[3,  1430] train_loss: 0.229 train_accuracy: 0.904 test_accuracy: 0.892\n",
            "[3,  1435] train_loss: 0.325 train_accuracy: 0.908 test_accuracy: 0.890\n",
            "[3,  1440] train_loss: 0.961 train_accuracy: 0.912 test_accuracy: 0.885\n",
            "[3,  1445] train_loss: 0.040 train_accuracy: 0.884 test_accuracy: 0.850\n",
            "[3,  1450] train_loss: 0.473 train_accuracy: 0.863 test_accuracy: 0.831\n",
            "[3,  1455] train_loss: 0.037 train_accuracy: 0.857 test_accuracy: 0.825\n",
            "[3,  1460] train_loss: 0.348 train_accuracy: 0.860 test_accuracy: 0.829\n",
            "[3,  1465] train_loss: 0.202 train_accuracy: 0.858 test_accuracy: 0.837\n",
            "[3,  1470] train_loss: 0.101 train_accuracy: 0.862 test_accuracy: 0.847\n",
            "[3,  1475] train_loss: 0.710 train_accuracy: 0.864 test_accuracy: 0.848\n",
            "[3,  1480] train_loss: 0.489 train_accuracy: 0.869 test_accuracy: 0.864\n",
            "[3,  1485] train_loss: 0.336 train_accuracy: 0.870 test_accuracy: 0.865\n",
            "[3,  1490] train_loss: 0.459 train_accuracy: 0.874 test_accuracy: 0.866\n",
            "[3,  1495] train_loss: 0.600 train_accuracy: 0.880 test_accuracy: 0.871\n",
            "[3,  1500] train_loss: 0.577 train_accuracy: 0.882 test_accuracy: 0.863\n",
            "[3,  1505] train_loss: 0.221 train_accuracy: 0.873 test_accuracy: 0.849\n",
            "[3,  1510] train_loss: 0.393 train_accuracy: 0.878 test_accuracy: 0.855\n",
            "[3,  1515] train_loss: 0.365 train_accuracy: 0.888 test_accuracy: 0.864\n",
            "[3,  1520] train_loss: 0.342 train_accuracy: 0.887 test_accuracy: 0.867\n",
            "[3,  1525] train_loss: 0.104 train_accuracy: 0.884 test_accuracy: 0.862\n",
            "[3,  1530] train_loss: 0.462 train_accuracy: 0.888 test_accuracy: 0.872\n",
            "[3,  1535] train_loss: 0.147 train_accuracy: 0.890 test_accuracy: 0.875\n",
            "[3,  1540] train_loss: 0.230 train_accuracy: 0.888 test_accuracy: 0.877\n",
            "[3,  1545] train_loss: 0.451 train_accuracy: 0.896 test_accuracy: 0.884\n",
            "[3,  1550] train_loss: 0.229 train_accuracy: 0.903 test_accuracy: 0.895\n",
            "[3,  1555] train_loss: 0.503 train_accuracy: 0.895 test_accuracy: 0.892\n",
            "[3,  1560] train_loss: 0.524 train_accuracy: 0.860 test_accuracy: 0.872\n",
            "[3,  1565] train_loss: 0.415 train_accuracy: 0.840 test_accuracy: 0.856\n",
            "[3,  1570] train_loss: 0.792 train_accuracy: 0.849 test_accuracy: 0.865\n",
            "[3,  1575] train_loss: 0.351 train_accuracy: 0.860 test_accuracy: 0.875\n",
            "[3,  1580] train_loss: 0.224 train_accuracy: 0.874 test_accuracy: 0.876\n",
            "[3,  1585] train_loss: 0.670 train_accuracy: 0.887 test_accuracy: 0.876\n",
            "[3,  1590] train_loss: 0.253 train_accuracy: 0.865 test_accuracy: 0.850\n",
            "[3,  1595] train_loss: 0.428 train_accuracy: 0.821 test_accuracy: 0.828\n",
            "[3,  1600] train_loss: 0.441 train_accuracy: 0.831 test_accuracy: 0.836\n",
            "[3,  1605] train_loss: 0.323 train_accuracy: 0.846 test_accuracy: 0.850\n",
            "[3,  1610] train_loss: 0.402 train_accuracy: 0.863 test_accuracy: 0.862\n",
            "[3,  1615] train_loss: 0.457 train_accuracy: 0.889 test_accuracy: 0.880\n",
            "[3,  1620] train_loss: 0.341 train_accuracy: 0.891 test_accuracy: 0.887\n",
            "[3,  1625] train_loss: 0.230 train_accuracy: 0.897 test_accuracy: 0.887\n",
            "[3,  1630] train_loss: 0.355 train_accuracy: 0.892 test_accuracy: 0.882\n",
            "[3,  1635] train_loss: 0.212 train_accuracy: 0.893 test_accuracy: 0.880\n",
            "[3,  1640] train_loss: 0.281 train_accuracy: 0.893 test_accuracy: 0.880\n",
            "[3,  1645] train_loss: 0.240 train_accuracy: 0.891 test_accuracy: 0.883\n",
            "[3,  1650] train_loss: 0.298 train_accuracy: 0.886 test_accuracy: 0.876\n",
            "[3,  1655] train_loss: 0.336 train_accuracy: 0.885 test_accuracy: 0.867\n",
            "[3,  1660] train_loss: 0.644 train_accuracy: 0.886 test_accuracy: 0.868\n",
            "[3,  1665] train_loss: 0.470 train_accuracy: 0.883 test_accuracy: 0.868\n",
            "[3,  1670] train_loss: 0.415 train_accuracy: 0.875 test_accuracy: 0.863\n",
            "[3,  1675] train_loss: 0.904 train_accuracy: 0.885 test_accuracy: 0.872\n",
            "[3,  1680] train_loss: 0.327 train_accuracy: 0.877 test_accuracy: 0.868\n",
            "[3,  1685] train_loss: 0.371 train_accuracy: 0.871 test_accuracy: 0.866\n",
            "[3,  1690] train_loss: 0.782 train_accuracy: 0.887 test_accuracy: 0.881\n",
            "[3,  1695] train_loss: 0.193 train_accuracy: 0.878 test_accuracy: 0.873\n",
            "[3,  1700] train_loss: 0.803 train_accuracy: 0.855 test_accuracy: 0.858\n",
            "[3,  1705] train_loss: 0.128 train_accuracy: 0.844 test_accuracy: 0.833\n",
            "[3,  1710] train_loss: 0.506 train_accuracy: 0.862 test_accuracy: 0.851\n",
            "[3,  1715] train_loss: 0.671 train_accuracy: 0.874 test_accuracy: 0.855\n",
            "[3,  1720] train_loss: 0.700 train_accuracy: 0.879 test_accuracy: 0.863\n",
            "[3,  1725] train_loss: 0.838 train_accuracy: 0.880 test_accuracy: 0.873\n",
            "[4,     5] train_loss: 0.437 train_accuracy: 0.883 test_accuracy: 0.883\n",
            "[4,    10] train_loss: 0.579 train_accuracy: 0.889 test_accuracy: 0.885\n",
            "[4,    15] train_loss: 0.392 train_accuracy: 0.884 test_accuracy: 0.879\n",
            "[4,    20] train_loss: 0.622 train_accuracy: 0.867 test_accuracy: 0.863\n",
            "[4,    25] train_loss: 0.617 train_accuracy: 0.873 test_accuracy: 0.859\n",
            "[4,    30] train_loss: 0.535 train_accuracy: 0.880 test_accuracy: 0.859\n",
            "[4,    35] train_loss: 0.423 train_accuracy: 0.884 test_accuracy: 0.860\n",
            "[4,    40] train_loss: 0.697 train_accuracy: 0.876 test_accuracy: 0.849\n",
            "[4,    45] train_loss: 1.198 train_accuracy: 0.890 test_accuracy: 0.869\n",
            "[4,    50] train_loss: 0.798 train_accuracy: 0.893 test_accuracy: 0.878\n",
            "[4,    55] train_loss: 0.527 train_accuracy: 0.866 test_accuracy: 0.863\n",
            "[4,    60] train_loss: 0.463 train_accuracy: 0.846 test_accuracy: 0.853\n",
            "[4,    65] train_loss: 0.899 train_accuracy: 0.855 test_accuracy: 0.859\n",
            "[4,    70] train_loss: 0.478 train_accuracy: 0.858 test_accuracy: 0.860\n",
            "[4,    75] train_loss: 0.456 train_accuracy: 0.870 test_accuracy: 0.860\n",
            "[4,    80] train_loss: 0.539 train_accuracy: 0.886 test_accuracy: 0.868\n",
            "[4,    85] train_loss: 0.294 train_accuracy: 0.880 test_accuracy: 0.862\n",
            "[4,    90] train_loss: 0.253 train_accuracy: 0.873 test_accuracy: 0.856\n",
            "[4,    95] train_loss: 0.171 train_accuracy: 0.873 test_accuracy: 0.857\n",
            "[4,   100] train_loss: 0.223 train_accuracy: 0.878 test_accuracy: 0.858\n",
            "[4,   105] train_loss: 0.391 train_accuracy: 0.894 test_accuracy: 0.869\n",
            "[4,   110] train_loss: 0.625 train_accuracy: 0.911 test_accuracy: 0.885\n",
            "[4,   115] train_loss: 0.548 train_accuracy: 0.902 test_accuracy: 0.873\n",
            "[4,   120] train_loss: 0.334 train_accuracy: 0.895 test_accuracy: 0.865\n",
            "[4,   125] train_loss: 0.401 train_accuracy: 0.891 test_accuracy: 0.860\n",
            "[4,   130] train_loss: 0.293 train_accuracy: 0.888 test_accuracy: 0.861\n",
            "[4,   135] train_loss: 0.428 train_accuracy: 0.888 test_accuracy: 0.863\n",
            "[4,   140] train_loss: 0.521 train_accuracy: 0.881 test_accuracy: 0.864\n",
            "[4,   145] train_loss: 0.709 train_accuracy: 0.870 test_accuracy: 0.858\n",
            "[4,   150] train_loss: 0.533 train_accuracy: 0.875 test_accuracy: 0.861\n",
            "[4,   155] train_loss: 0.655 train_accuracy: 0.882 test_accuracy: 0.862\n",
            "[4,   160] train_loss: 0.423 train_accuracy: 0.888 test_accuracy: 0.874\n",
            "[4,   165] train_loss: 0.432 train_accuracy: 0.881 test_accuracy: 0.876\n",
            "[4,   170] train_loss: 0.576 train_accuracy: 0.880 test_accuracy: 0.873\n",
            "[4,   175] train_loss: 0.402 train_accuracy: 0.890 test_accuracy: 0.881\n",
            "[4,   180] train_loss: 0.316 train_accuracy: 0.898 test_accuracy: 0.877\n",
            "[4,   185] train_loss: 0.496 train_accuracy: 0.897 test_accuracy: 0.870\n",
            "[4,   190] train_loss: 0.326 train_accuracy: 0.885 test_accuracy: 0.856\n",
            "[4,   195] train_loss: 0.220 train_accuracy: 0.875 test_accuracy: 0.853\n",
            "[4,   200] train_loss: 0.452 train_accuracy: 0.874 test_accuracy: 0.857\n",
            "[4,   205] train_loss: 0.318 train_accuracy: 0.870 test_accuracy: 0.861\n",
            "[4,   210] train_loss: 0.229 train_accuracy: 0.872 test_accuracy: 0.852\n",
            "[4,   215] train_loss: 0.583 train_accuracy: 0.864 test_accuracy: 0.844\n",
            "[4,   220] train_loss: 0.576 train_accuracy: 0.847 test_accuracy: 0.837\n",
            "[4,   225] train_loss: 0.643 train_accuracy: 0.867 test_accuracy: 0.866\n",
            "[4,   230] train_loss: 0.285 train_accuracy: 0.871 test_accuracy: 0.870\n",
            "[4,   235] train_loss: 0.281 train_accuracy: 0.875 test_accuracy: 0.868\n",
            "[4,   240] train_loss: 0.321 train_accuracy: 0.866 test_accuracy: 0.864\n",
            "[4,   245] train_loss: 0.424 train_accuracy: 0.871 test_accuracy: 0.867\n",
            "[4,   250] train_loss: 0.088 train_accuracy: 0.879 test_accuracy: 0.876\n",
            "[4,   255] train_loss: 0.445 train_accuracy: 0.886 test_accuracy: 0.877\n",
            "[4,   260] train_loss: 0.420 train_accuracy: 0.893 test_accuracy: 0.887\n",
            "[4,   265] train_loss: 0.260 train_accuracy: 0.900 test_accuracy: 0.891\n",
            "[4,   270] train_loss: 0.252 train_accuracy: 0.916 test_accuracy: 0.901\n",
            "[4,   275] train_loss: 0.212 train_accuracy: 0.904 test_accuracy: 0.891\n",
            "[4,   280] train_loss: 0.465 train_accuracy: 0.895 test_accuracy: 0.883\n",
            "[4,   285] train_loss: 0.238 train_accuracy: 0.879 test_accuracy: 0.874\n",
            "[4,   290] train_loss: 0.501 train_accuracy: 0.875 test_accuracy: 0.869\n",
            "[4,   295] train_loss: 0.354 train_accuracy: 0.879 test_accuracy: 0.861\n",
            "[4,   300] train_loss: 0.294 train_accuracy: 0.871 test_accuracy: 0.847\n",
            "[4,   305] train_loss: 0.342 train_accuracy: 0.873 test_accuracy: 0.851\n",
            "[4,   310] train_loss: 1.013 train_accuracy: 0.879 test_accuracy: 0.870\n",
            "[4,   315] train_loss: 0.209 train_accuracy: 0.869 test_accuracy: 0.857\n",
            "[4,   320] train_loss: 0.653 train_accuracy: 0.874 test_accuracy: 0.862\n",
            "[4,   325] train_loss: 0.252 train_accuracy: 0.886 test_accuracy: 0.867\n",
            "[4,   330] train_loss: 0.966 train_accuracy: 0.894 test_accuracy: 0.868\n",
            "[4,   335] train_loss: 0.207 train_accuracy: 0.908 test_accuracy: 0.886\n",
            "[4,   340] train_loss: 0.373 train_accuracy: 0.910 test_accuracy: 0.890\n",
            "[4,   345] train_loss: 0.127 train_accuracy: 0.908 test_accuracy: 0.894\n",
            "[4,   350] train_loss: 0.473 train_accuracy: 0.905 test_accuracy: 0.891\n",
            "[4,   355] train_loss: 0.423 train_accuracy: 0.903 test_accuracy: 0.885\n",
            "[4,   360] train_loss: 0.196 train_accuracy: 0.903 test_accuracy: 0.877\n",
            "[4,   365] train_loss: 0.068 train_accuracy: 0.897 test_accuracy: 0.868\n",
            "[4,   370] train_loss: 0.272 train_accuracy: 0.900 test_accuracy: 0.873\n",
            "[4,   375] train_loss: 0.566 train_accuracy: 0.907 test_accuracy: 0.880\n",
            "[4,   380] train_loss: 0.191 train_accuracy: 0.905 test_accuracy: 0.882\n",
            "[4,   385] train_loss: 0.828 train_accuracy: 0.909 test_accuracy: 0.885\n",
            "[4,   390] train_loss: 0.343 train_accuracy: 0.908 test_accuracy: 0.887\n",
            "[4,   395] train_loss: 0.219 train_accuracy: 0.899 test_accuracy: 0.884\n",
            "[4,   400] train_loss: 0.377 train_accuracy: 0.904 test_accuracy: 0.883\n",
            "[4,   405] train_loss: 0.195 train_accuracy: 0.895 test_accuracy: 0.876\n",
            "[4,   410] train_loss: 0.561 train_accuracy: 0.895 test_accuracy: 0.875\n",
            "[4,   415] train_loss: 0.209 train_accuracy: 0.898 test_accuracy: 0.879\n",
            "[4,   420] train_loss: 0.637 train_accuracy: 0.904 test_accuracy: 0.881\n",
            "[4,   425] train_loss: 0.589 train_accuracy: 0.895 test_accuracy: 0.860\n",
            "[4,   430] train_loss: 0.351 train_accuracy: 0.895 test_accuracy: 0.866\n",
            "[4,   435] train_loss: 0.414 train_accuracy: 0.884 test_accuracy: 0.874\n",
            "[4,   440] train_loss: 0.208 train_accuracy: 0.890 test_accuracy: 0.875\n",
            "[4,   445] train_loss: 0.301 train_accuracy: 0.898 test_accuracy: 0.878\n",
            "[4,   450] train_loss: 0.166 train_accuracy: 0.906 test_accuracy: 0.885\n",
            "[4,   455] train_loss: 0.317 train_accuracy: 0.907 test_accuracy: 0.890\n",
            "[4,   460] train_loss: 0.216 train_accuracy: 0.916 test_accuracy: 0.895\n",
            "[4,   465] train_loss: 0.220 train_accuracy: 0.915 test_accuracy: 0.892\n",
            "[4,   470] train_loss: 0.283 train_accuracy: 0.918 test_accuracy: 0.895\n",
            "[4,   475] train_loss: 0.203 train_accuracy: 0.918 test_accuracy: 0.895\n",
            "[4,   480] train_loss: 0.122 train_accuracy: 0.906 test_accuracy: 0.886\n",
            "[4,   485] train_loss: 0.407 train_accuracy: 0.911 test_accuracy: 0.883\n",
            "[4,   490] train_loss: 0.347 train_accuracy: 0.909 test_accuracy: 0.881\n",
            "[4,   495] train_loss: 0.323 train_accuracy: 0.908 test_accuracy: 0.882\n",
            "[4,   500] train_loss: 0.322 train_accuracy: 0.900 test_accuracy: 0.870\n",
            "[4,   505] train_loss: 0.296 train_accuracy: 0.902 test_accuracy: 0.875\n",
            "[4,   510] train_loss: 0.315 train_accuracy: 0.905 test_accuracy: 0.879\n",
            "[4,   515] train_loss: 0.404 train_accuracy: 0.908 test_accuracy: 0.886\n",
            "[4,   520] train_loss: 0.241 train_accuracy: 0.905 test_accuracy: 0.886\n",
            "[4,   525] train_loss: 0.116 train_accuracy: 0.903 test_accuracy: 0.889\n",
            "[4,   530] train_loss: 0.359 train_accuracy: 0.907 test_accuracy: 0.891\n",
            "[4,   535] train_loss: 0.148 train_accuracy: 0.904 test_accuracy: 0.889\n",
            "[4,   540] train_loss: 0.489 train_accuracy: 0.896 test_accuracy: 0.877\n",
            "[4,   545] train_loss: 0.614 train_accuracy: 0.881 test_accuracy: 0.861\n",
            "[4,   550] train_loss: 0.491 train_accuracy: 0.871 test_accuracy: 0.847\n",
            "[4,   555] train_loss: 0.256 train_accuracy: 0.863 test_accuracy: 0.847\n",
            "[4,   560] train_loss: 0.732 train_accuracy: 0.865 test_accuracy: 0.868\n",
            "[4,   565] train_loss: 0.819 train_accuracy: 0.857 test_accuracy: 0.867\n",
            "[4,   570] train_loss: 0.493 train_accuracy: 0.857 test_accuracy: 0.860\n",
            "[4,   575] train_loss: 0.559 train_accuracy: 0.863 test_accuracy: 0.863\n",
            "[4,   580] train_loss: 0.621 train_accuracy: 0.892 test_accuracy: 0.886\n",
            "[4,   585] train_loss: 0.220 train_accuracy: 0.870 test_accuracy: 0.858\n",
            "[4,   590] train_loss: 0.396 train_accuracy: 0.851 test_accuracy: 0.831\n",
            "[4,   595] train_loss: 0.418 train_accuracy: 0.844 test_accuracy: 0.812\n",
            "[4,   600] train_loss: 0.484 train_accuracy: 0.829 test_accuracy: 0.810\n",
            "[4,   605] train_loss: 1.585 train_accuracy: 0.829 test_accuracy: 0.835\n",
            "[4,   610] train_loss: 0.580 train_accuracy: 0.874 test_accuracy: 0.880\n",
            "[4,   615] train_loss: 0.577 train_accuracy: 0.903 test_accuracy: 0.900\n",
            "[4,   620] train_loss: 0.244 train_accuracy: 0.914 test_accuracy: 0.898\n",
            "[4,   625] train_loss: 0.355 train_accuracy: 0.909 test_accuracy: 0.889\n",
            "[4,   630] train_loss: 0.605 train_accuracy: 0.902 test_accuracy: 0.881\n",
            "[4,   635] train_loss: 0.330 train_accuracy: 0.898 test_accuracy: 0.880\n",
            "[4,   640] train_loss: 0.153 train_accuracy: 0.881 test_accuracy: 0.872\n",
            "[4,   645] train_loss: 0.398 train_accuracy: 0.872 test_accuracy: 0.872\n",
            "[4,   650] train_loss: 0.456 train_accuracy: 0.863 test_accuracy: 0.875\n",
            "[4,   655] train_loss: 0.232 train_accuracy: 0.868 test_accuracy: 0.886\n",
            "[4,   660] train_loss: 0.571 train_accuracy: 0.844 test_accuracy: 0.846\n",
            "[4,   665] train_loss: 0.525 train_accuracy: 0.838 test_accuracy: 0.833\n",
            "[4,   670] train_loss: 0.560 train_accuracy: 0.851 test_accuracy: 0.838\n",
            "[4,   675] train_loss: 0.642 train_accuracy: 0.874 test_accuracy: 0.866\n",
            "[4,   680] train_loss: 0.641 train_accuracy: 0.881 test_accuracy: 0.863\n",
            "[4,   685] train_loss: 0.294 train_accuracy: 0.895 test_accuracy: 0.871\n",
            "[4,   690] train_loss: 0.423 train_accuracy: 0.891 test_accuracy: 0.866\n",
            "[4,   695] train_loss: 0.573 train_accuracy: 0.884 test_accuracy: 0.862\n",
            "[4,   700] train_loss: 0.487 train_accuracy: 0.896 test_accuracy: 0.869\n",
            "[4,   705] train_loss: 0.924 train_accuracy: 0.900 test_accuracy: 0.871\n",
            "[4,   710] train_loss: 0.138 train_accuracy: 0.869 test_accuracy: 0.849\n",
            "[4,   715] train_loss: 0.552 train_accuracy: 0.857 test_accuracy: 0.842\n",
            "[4,   720] train_loss: 0.394 train_accuracy: 0.896 test_accuracy: 0.883\n",
            "[4,   725] train_loss: 0.101 train_accuracy: 0.900 test_accuracy: 0.891\n",
            "[4,   730] train_loss: 0.330 train_accuracy: 0.901 test_accuracy: 0.886\n",
            "[4,   735] train_loss: 0.250 train_accuracy: 0.903 test_accuracy: 0.883\n",
            "[4,   740] train_loss: 0.098 train_accuracy: 0.902 test_accuracy: 0.882\n",
            "[4,   745] train_loss: 0.051 train_accuracy: 0.904 test_accuracy: 0.880\n",
            "[4,   750] train_loss: 0.680 train_accuracy: 0.911 test_accuracy: 0.891\n",
            "[4,   755] train_loss: 0.383 train_accuracy: 0.915 test_accuracy: 0.892\n",
            "[4,   760] train_loss: 0.679 train_accuracy: 0.915 test_accuracy: 0.892\n",
            "[4,   765] train_loss: 0.314 train_accuracy: 0.914 test_accuracy: 0.890\n",
            "[4,   770] train_loss: 0.508 train_accuracy: 0.910 test_accuracy: 0.887\n",
            "[4,   775] train_loss: 0.424 train_accuracy: 0.903 test_accuracy: 0.881\n",
            "[4,   780] train_loss: 0.702 train_accuracy: 0.894 test_accuracy: 0.869\n",
            "[4,   785] train_loss: 0.883 train_accuracy: 0.893 test_accuracy: 0.871\n",
            "[4,   790] train_loss: 0.793 train_accuracy: 0.893 test_accuracy: 0.874\n",
            "[4,   795] train_loss: 0.282 train_accuracy: 0.898 test_accuracy: 0.880\n",
            "[4,   800] train_loss: 0.337 train_accuracy: 0.905 test_accuracy: 0.889\n",
            "[4,   805] train_loss: 0.325 train_accuracy: 0.908 test_accuracy: 0.894\n",
            "[4,   810] train_loss: 0.033 train_accuracy: 0.908 test_accuracy: 0.892\n",
            "[4,   815] train_loss: 0.354 train_accuracy: 0.908 test_accuracy: 0.889\n",
            "[4,   820] train_loss: 0.482 train_accuracy: 0.913 test_accuracy: 0.891\n",
            "[4,   825] train_loss: 0.410 train_accuracy: 0.910 test_accuracy: 0.898\n",
            "[4,   830] train_loss: 0.046 train_accuracy: 0.891 test_accuracy: 0.890\n",
            "[4,   835] train_loss: 0.602 train_accuracy: 0.883 test_accuracy: 0.879\n",
            "[4,   840] train_loss: 0.479 train_accuracy: 0.876 test_accuracy: 0.854\n",
            "[4,   845] train_loss: 0.424 train_accuracy: 0.895 test_accuracy: 0.865\n",
            "[4,   850] train_loss: 0.601 train_accuracy: 0.902 test_accuracy: 0.880\n",
            "[4,   855] train_loss: 0.356 train_accuracy: 0.887 test_accuracy: 0.883\n",
            "[4,   860] train_loss: 1.044 train_accuracy: 0.886 test_accuracy: 0.879\n",
            "[4,   865] train_loss: 0.709 train_accuracy: 0.894 test_accuracy: 0.877\n",
            "[4,   870] train_loss: 0.430 train_accuracy: 0.904 test_accuracy: 0.883\n",
            "[4,   875] train_loss: 0.224 train_accuracy: 0.904 test_accuracy: 0.882\n",
            "[4,   880] train_loss: 0.494 train_accuracy: 0.902 test_accuracy: 0.881\n",
            "[4,   885] train_loss: 0.417 train_accuracy: 0.897 test_accuracy: 0.874\n",
            "[4,   890] train_loss: 0.113 train_accuracy: 0.903 test_accuracy: 0.877\n",
            "[4,   895] train_loss: 0.029 train_accuracy: 0.904 test_accuracy: 0.878\n",
            "[4,   900] train_loss: 0.049 train_accuracy: 0.904 test_accuracy: 0.877\n",
            "[4,   905] train_loss: 0.395 train_accuracy: 0.914 test_accuracy: 0.886\n",
            "[4,   910] train_loss: 0.104 train_accuracy: 0.917 test_accuracy: 0.887\n",
            "[4,   915] train_loss: 0.676 train_accuracy: 0.914 test_accuracy: 0.885\n",
            "[4,   920] train_loss: 0.158 train_accuracy: 0.893 test_accuracy: 0.868\n",
            "[4,   925] train_loss: 0.364 train_accuracy: 0.869 test_accuracy: 0.866\n",
            "[4,   930] train_loss: 0.883 train_accuracy: 0.859 test_accuracy: 0.857\n",
            "[4,   935] train_loss: 0.328 train_accuracy: 0.858 test_accuracy: 0.850\n",
            "[4,   940] train_loss: 0.441 train_accuracy: 0.875 test_accuracy: 0.853\n",
            "[4,   945] train_loss: 0.257 train_accuracy: 0.876 test_accuracy: 0.852\n",
            "[4,   950] train_loss: 0.948 train_accuracy: 0.892 test_accuracy: 0.872\n",
            "[4,   955] train_loss: 0.622 train_accuracy: 0.910 test_accuracy: 0.890\n",
            "[4,   960] train_loss: 0.291 train_accuracy: 0.905 test_accuracy: 0.886\n",
            "[4,   965] train_loss: 0.364 train_accuracy: 0.900 test_accuracy: 0.878\n",
            "[4,   970] train_loss: 0.511 train_accuracy: 0.896 test_accuracy: 0.873\n",
            "[4,   975] train_loss: 0.435 train_accuracy: 0.893 test_accuracy: 0.873\n",
            "[4,   980] train_loss: 0.555 train_accuracy: 0.895 test_accuracy: 0.874\n",
            "[4,   985] train_loss: 0.038 train_accuracy: 0.890 test_accuracy: 0.871\n",
            "[4,   990] train_loss: 1.281 train_accuracy: 0.894 test_accuracy: 0.872\n",
            "[4,   995] train_loss: 0.394 train_accuracy: 0.869 test_accuracy: 0.846\n",
            "[4,  1000] train_loss: 0.904 train_accuracy: 0.867 test_accuracy: 0.845\n",
            "[4,  1005] train_loss: 0.794 train_accuracy: 0.870 test_accuracy: 0.864\n",
            "[4,  1010] train_loss: 0.357 train_accuracy: 0.859 test_accuracy: 0.858\n",
            "[4,  1015] train_loss: 0.671 train_accuracy: 0.858 test_accuracy: 0.856\n",
            "[4,  1020] train_loss: 0.502 train_accuracy: 0.882 test_accuracy: 0.866\n",
            "[4,  1025] train_loss: 0.212 train_accuracy: 0.890 test_accuracy: 0.871\n",
            "[4,  1030] train_loss: 0.237 train_accuracy: 0.888 test_accuracy: 0.864\n",
            "[4,  1035] train_loss: 0.392 train_accuracy: 0.885 test_accuracy: 0.859\n",
            "[4,  1040] train_loss: 0.548 train_accuracy: 0.893 test_accuracy: 0.871\n",
            "[4,  1045] train_loss: 0.603 train_accuracy: 0.904 test_accuracy: 0.878\n",
            "[4,  1050] train_loss: 0.298 train_accuracy: 0.896 test_accuracy: 0.861\n",
            "[4,  1055] train_loss: 0.655 train_accuracy: 0.902 test_accuracy: 0.871\n",
            "[4,  1060] train_loss: 0.247 train_accuracy: 0.892 test_accuracy: 0.878\n",
            "[4,  1065] train_loss: 0.297 train_accuracy: 0.894 test_accuracy: 0.872\n",
            "[4,  1070] train_loss: 0.111 train_accuracy: 0.886 test_accuracy: 0.861\n",
            "[4,  1075] train_loss: 0.464 train_accuracy: 0.883 test_accuracy: 0.857\n",
            "[4,  1080] train_loss: 0.222 train_accuracy: 0.891 test_accuracy: 0.863\n",
            "[4,  1085] train_loss: 0.410 train_accuracy: 0.899 test_accuracy: 0.870\n",
            "[4,  1090] train_loss: 0.211 train_accuracy: 0.901 test_accuracy: 0.871\n",
            "[4,  1095] train_loss: 0.374 train_accuracy: 0.908 test_accuracy: 0.884\n",
            "[4,  1100] train_loss: 0.517 train_accuracy: 0.908 test_accuracy: 0.895\n",
            "[4,  1105] train_loss: 0.476 train_accuracy: 0.912 test_accuracy: 0.901\n",
            "[4,  1110] train_loss: 0.274 train_accuracy: 0.913 test_accuracy: 0.897\n",
            "[4,  1115] train_loss: 0.173 train_accuracy: 0.910 test_accuracy: 0.893\n",
            "[4,  1120] train_loss: 0.243 train_accuracy: 0.911 test_accuracy: 0.891\n",
            "[4,  1125] train_loss: 0.515 train_accuracy: 0.914 test_accuracy: 0.890\n",
            "[4,  1130] train_loss: 0.194 train_accuracy: 0.903 test_accuracy: 0.872\n",
            "[4,  1135] train_loss: 0.261 train_accuracy: 0.879 test_accuracy: 0.849\n",
            "[4,  1140] train_loss: 0.287 train_accuracy: 0.887 test_accuracy: 0.856\n",
            "[4,  1145] train_loss: 0.461 train_accuracy: 0.903 test_accuracy: 0.873\n",
            "[4,  1150] train_loss: 0.503 train_accuracy: 0.914 test_accuracy: 0.889\n",
            "[4,  1155] train_loss: 0.132 train_accuracy: 0.909 test_accuracy: 0.890\n",
            "[4,  1160] train_loss: 0.341 train_accuracy: 0.908 test_accuracy: 0.887\n",
            "[4,  1165] train_loss: 0.193 train_accuracy: 0.913 test_accuracy: 0.892\n",
            "[4,  1170] train_loss: 0.139 train_accuracy: 0.912 test_accuracy: 0.895\n",
            "[4,  1175] train_loss: 0.283 train_accuracy: 0.909 test_accuracy: 0.892\n",
            "[4,  1180] train_loss: 0.081 train_accuracy: 0.896 test_accuracy: 0.875\n",
            "[4,  1185] train_loss: 0.099 train_accuracy: 0.889 test_accuracy: 0.863\n",
            "[4,  1190] train_loss: 0.347 train_accuracy: 0.885 test_accuracy: 0.862\n",
            "[4,  1195] train_loss: 0.014 train_accuracy: 0.884 test_accuracy: 0.860\n",
            "[4,  1200] train_loss: 0.583 train_accuracy: 0.895 test_accuracy: 0.870\n",
            "[4,  1205] train_loss: 0.356 train_accuracy: 0.913 test_accuracy: 0.883\n",
            "[4,  1210] train_loss: 0.056 train_accuracy: 0.917 test_accuracy: 0.892\n",
            "[4,  1215] train_loss: 0.690 train_accuracy: 0.917 test_accuracy: 0.896\n",
            "[4,  1220] train_loss: 0.431 train_accuracy: 0.911 test_accuracy: 0.891\n",
            "[4,  1225] train_loss: 0.170 train_accuracy: 0.899 test_accuracy: 0.883\n",
            "[4,  1230] train_loss: 0.510 train_accuracy: 0.900 test_accuracy: 0.883\n",
            "[4,  1235] train_loss: 0.735 train_accuracy: 0.913 test_accuracy: 0.899\n",
            "[4,  1240] train_loss: 0.666 train_accuracy: 0.904 test_accuracy: 0.898\n",
            "[4,  1245] train_loss: 0.570 train_accuracy: 0.887 test_accuracy: 0.887\n",
            "[4,  1250] train_loss: 0.477 train_accuracy: 0.879 test_accuracy: 0.875\n",
            "[4,  1255] train_loss: 0.572 train_accuracy: 0.877 test_accuracy: 0.866\n",
            "[4,  1260] train_loss: 0.132 train_accuracy: 0.885 test_accuracy: 0.866\n",
            "[4,  1265] train_loss: 0.488 train_accuracy: 0.889 test_accuracy: 0.871\n",
            "[4,  1270] train_loss: 0.336 train_accuracy: 0.898 test_accuracy: 0.880\n",
            "[4,  1275] train_loss: 0.287 train_accuracy: 0.903 test_accuracy: 0.885\n",
            "[4,  1280] train_loss: 0.392 train_accuracy: 0.915 test_accuracy: 0.896\n",
            "[4,  1285] train_loss: 0.240 train_accuracy: 0.923 test_accuracy: 0.901\n",
            "[4,  1290] train_loss: 0.372 train_accuracy: 0.916 test_accuracy: 0.899\n",
            "[4,  1295] train_loss: 0.369 train_accuracy: 0.917 test_accuracy: 0.897\n",
            "[4,  1300] train_loss: 0.164 train_accuracy: 0.924 test_accuracy: 0.901\n",
            "[4,  1305] train_loss: 0.244 train_accuracy: 0.921 test_accuracy: 0.894\n",
            "[4,  1310] train_loss: 0.044 train_accuracy: 0.914 test_accuracy: 0.883\n",
            "[4,  1315] train_loss: 0.100 train_accuracy: 0.907 test_accuracy: 0.875\n",
            "[4,  1320] train_loss: 0.502 train_accuracy: 0.910 test_accuracy: 0.884\n",
            "[4,  1325] train_loss: 0.438 train_accuracy: 0.901 test_accuracy: 0.887\n",
            "[4,  1330] train_loss: 0.306 train_accuracy: 0.887 test_accuracy: 0.884\n",
            "[4,  1335] train_loss: 0.260 train_accuracy: 0.881 test_accuracy: 0.888\n",
            "[4,  1340] train_loss: 0.638 train_accuracy: 0.878 test_accuracy: 0.892\n",
            "[4,  1345] train_loss: 0.354 train_accuracy: 0.890 test_accuracy: 0.901\n",
            "[4,  1350] train_loss: 0.282 train_accuracy: 0.898 test_accuracy: 0.902\n",
            "[4,  1355] train_loss: 0.417 train_accuracy: 0.900 test_accuracy: 0.894\n",
            "[4,  1360] train_loss: 0.266 train_accuracy: 0.910 test_accuracy: 0.887\n",
            "[4,  1365] train_loss: 0.099 train_accuracy: 0.913 test_accuracy: 0.883\n",
            "[4,  1370] train_loss: 0.172 train_accuracy: 0.908 test_accuracy: 0.881\n",
            "[4,  1375] train_loss: 0.061 train_accuracy: 0.903 test_accuracy: 0.878\n",
            "[4,  1380] train_loss: 0.253 train_accuracy: 0.897 test_accuracy: 0.878\n",
            "[4,  1385] train_loss: 0.397 train_accuracy: 0.901 test_accuracy: 0.892\n",
            "[4,  1390] train_loss: 0.104 train_accuracy: 0.898 test_accuracy: 0.891\n",
            "[4,  1395] train_loss: 0.141 train_accuracy: 0.899 test_accuracy: 0.895\n",
            "[4,  1400] train_loss: 0.322 train_accuracy: 0.898 test_accuracy: 0.897\n",
            "[4,  1405] train_loss: 0.555 train_accuracy: 0.897 test_accuracy: 0.891\n",
            "[4,  1410] train_loss: 0.394 train_accuracy: 0.905 test_accuracy: 0.888\n",
            "[4,  1415] train_loss: 0.128 train_accuracy: 0.902 test_accuracy: 0.889\n",
            "[4,  1420] train_loss: 0.289 train_accuracy: 0.877 test_accuracy: 0.887\n",
            "[4,  1425] train_loss: 0.624 train_accuracy: 0.862 test_accuracy: 0.881\n",
            "[4,  1430] train_loss: 0.275 train_accuracy: 0.863 test_accuracy: 0.867\n",
            "[4,  1435] train_loss: 0.450 train_accuracy: 0.887 test_accuracy: 0.878\n",
            "[4,  1440] train_loss: 1.029 train_accuracy: 0.902 test_accuracy: 0.879\n",
            "[4,  1445] train_loss: 0.339 train_accuracy: 0.900 test_accuracy: 0.870\n",
            "[4,  1450] train_loss: 0.304 train_accuracy: 0.893 test_accuracy: 0.865\n",
            "[4,  1455] train_loss: 0.374 train_accuracy: 0.879 test_accuracy: 0.859\n",
            "[4,  1460] train_loss: 0.368 train_accuracy: 0.877 test_accuracy: 0.856\n",
            "[4,  1465] train_loss: 0.349 train_accuracy: 0.889 test_accuracy: 0.859\n",
            "[4,  1470] train_loss: 0.375 train_accuracy: 0.891 test_accuracy: 0.860\n",
            "[4,  1475] train_loss: 0.257 train_accuracy: 0.898 test_accuracy: 0.869\n",
            "[4,  1480] train_loss: 0.360 train_accuracy: 0.911 test_accuracy: 0.880\n",
            "[4,  1485] train_loss: 0.187 train_accuracy: 0.918 test_accuracy: 0.888\n",
            "[4,  1490] train_loss: 0.651 train_accuracy: 0.916 test_accuracy: 0.888\n",
            "[4,  1495] train_loss: 0.326 train_accuracy: 0.912 test_accuracy: 0.887\n",
            "[4,  1500] train_loss: 0.771 train_accuracy: 0.916 test_accuracy: 0.893\n",
            "[4,  1505] train_loss: 0.066 train_accuracy: 0.902 test_accuracy: 0.886\n",
            "[4,  1510] train_loss: 0.478 train_accuracy: 0.891 test_accuracy: 0.878\n",
            "[4,  1515] train_loss: 0.525 train_accuracy: 0.892 test_accuracy: 0.877\n",
            "[4,  1520] train_loss: 0.260 train_accuracy: 0.893 test_accuracy: 0.874\n",
            "[4,  1525] train_loss: 0.112 train_accuracy: 0.885 test_accuracy: 0.862\n",
            "[4,  1530] train_loss: 0.578 train_accuracy: 0.908 test_accuracy: 0.885\n",
            "[4,  1535] train_loss: 0.040 train_accuracy: 0.907 test_accuracy: 0.877\n",
            "[4,  1540] train_loss: 0.066 train_accuracy: 0.902 test_accuracy: 0.873\n",
            "[4,  1545] train_loss: 0.324 train_accuracy: 0.896 test_accuracy: 0.870\n",
            "[4,  1550] train_loss: 0.574 train_accuracy: 0.893 test_accuracy: 0.869\n",
            "[4,  1555] train_loss: 0.571 train_accuracy: 0.904 test_accuracy: 0.879\n",
            "[4,  1560] train_loss: 0.665 train_accuracy: 0.914 test_accuracy: 0.890\n",
            "[4,  1565] train_loss: 0.172 train_accuracy: 0.914 test_accuracy: 0.891\n",
            "[4,  1570] train_loss: 0.598 train_accuracy: 0.923 test_accuracy: 0.901\n",
            "[4,  1575] train_loss: 0.420 train_accuracy: 0.925 test_accuracy: 0.904\n",
            "[4,  1580] train_loss: 0.235 train_accuracy: 0.922 test_accuracy: 0.900\n",
            "[4,  1585] train_loss: 0.093 train_accuracy: 0.916 test_accuracy: 0.895\n",
            "[4,  1590] train_loss: 0.309 train_accuracy: 0.919 test_accuracy: 0.895\n",
            "[4,  1595] train_loss: 0.210 train_accuracy: 0.919 test_accuracy: 0.899\n",
            "[4,  1600] train_loss: 0.412 train_accuracy: 0.916 test_accuracy: 0.902\n",
            "[4,  1605] train_loss: 0.261 train_accuracy: 0.920 test_accuracy: 0.901\n",
            "[4,  1610] train_loss: 0.217 train_accuracy: 0.914 test_accuracy: 0.889\n",
            "[4,  1615] train_loss: 0.304 train_accuracy: 0.915 test_accuracy: 0.892\n",
            "[4,  1620] train_loss: 0.638 train_accuracy: 0.911 test_accuracy: 0.896\n",
            "[4,  1625] train_loss: 0.161 train_accuracy: 0.887 test_accuracy: 0.884\n",
            "[4,  1630] train_loss: 0.289 train_accuracy: 0.887 test_accuracy: 0.880\n",
            "[4,  1635] train_loss: 0.281 train_accuracy: 0.890 test_accuracy: 0.877\n",
            "[4,  1640] train_loss: 0.505 train_accuracy: 0.901 test_accuracy: 0.882\n",
            "[4,  1645] train_loss: 0.514 train_accuracy: 0.904 test_accuracy: 0.883\n",
            "[4,  1650] train_loss: 0.285 train_accuracy: 0.908 test_accuracy: 0.884\n",
            "[4,  1655] train_loss: 0.225 train_accuracy: 0.917 test_accuracy: 0.895\n",
            "[4,  1660] train_loss: 0.951 train_accuracy: 0.914 test_accuracy: 0.898\n",
            "[4,  1665] train_loss: 0.403 train_accuracy: 0.911 test_accuracy: 0.898\n",
            "[4,  1670] train_loss: 0.497 train_accuracy: 0.906 test_accuracy: 0.901\n",
            "[4,  1675] train_loss: 0.293 train_accuracy: 0.900 test_accuracy: 0.897\n",
            "[4,  1680] train_loss: 0.435 train_accuracy: 0.903 test_accuracy: 0.884\n",
            "[4,  1685] train_loss: 0.306 train_accuracy: 0.899 test_accuracy: 0.873\n",
            "[4,  1690] train_loss: 0.243 train_accuracy: 0.902 test_accuracy: 0.873\n",
            "[4,  1695] train_loss: 0.468 train_accuracy: 0.910 test_accuracy: 0.884\n",
            "[4,  1700] train_loss: 0.170 train_accuracy: 0.913 test_accuracy: 0.891\n",
            "[4,  1705] train_loss: 0.573 train_accuracy: 0.916 test_accuracy: 0.896\n",
            "[4,  1710] train_loss: 0.598 train_accuracy: 0.916 test_accuracy: 0.894\n",
            "[4,  1715] train_loss: 0.179 train_accuracy: 0.912 test_accuracy: 0.885\n",
            "[4,  1720] train_loss: 0.353 train_accuracy: 0.914 test_accuracy: 0.886\n",
            "[4,  1725] train_loss: 0.525 train_accuracy: 0.908 test_accuracy: 0.889\n",
            "[5,     5] train_loss: 0.334 train_accuracy: 0.900 test_accuracy: 0.882\n",
            "[5,    10] train_loss: 0.248 train_accuracy: 0.904 test_accuracy: 0.888\n",
            "[5,    15] train_loss: 0.311 train_accuracy: 0.908 test_accuracy: 0.894\n",
            "[5,    20] train_loss: 0.432 train_accuracy: 0.910 test_accuracy: 0.900\n",
            "[5,    25] train_loss: 0.290 train_accuracy: 0.900 test_accuracy: 0.893\n",
            "[5,    30] train_loss: 0.288 train_accuracy: 0.910 test_accuracy: 0.883\n",
            "[5,    35] train_loss: 0.289 train_accuracy: 0.907 test_accuracy: 0.879\n",
            "[5,    40] train_loss: 0.247 train_accuracy: 0.899 test_accuracy: 0.872\n",
            "[5,    45] train_loss: 0.665 train_accuracy: 0.899 test_accuracy: 0.879\n",
            "[5,    50] train_loss: 0.518 train_accuracy: 0.914 test_accuracy: 0.890\n",
            "[5,    55] train_loss: 0.119 train_accuracy: 0.918 test_accuracy: 0.892\n",
            "[5,    60] train_loss: 0.131 train_accuracy: 0.919 test_accuracy: 0.894\n",
            "[5,    65] train_loss: 1.089 train_accuracy: 0.917 test_accuracy: 0.895\n",
            "[5,    70] train_loss: 0.462 train_accuracy: 0.903 test_accuracy: 0.890\n",
            "[5,    75] train_loss: 0.110 train_accuracy: 0.884 test_accuracy: 0.877\n",
            "[5,    80] train_loss: 0.351 train_accuracy: 0.868 test_accuracy: 0.868\n",
            "[5,    85] train_loss: 0.451 train_accuracy: 0.866 test_accuracy: 0.860\n",
            "[5,    90] train_loss: 0.313 train_accuracy: 0.871 test_accuracy: 0.859\n",
            "[5,    95] train_loss: 0.289 train_accuracy: 0.878 test_accuracy: 0.860\n",
            "[5,   100] train_loss: 0.322 train_accuracy: 0.890 test_accuracy: 0.877\n",
            "[5,   105] train_loss: 0.146 train_accuracy: 0.902 test_accuracy: 0.886\n",
            "[5,   110] train_loss: 0.094 train_accuracy: 0.907 test_accuracy: 0.893\n",
            "[5,   115] train_loss: 0.272 train_accuracy: 0.914 test_accuracy: 0.902\n",
            "[5,   120] train_loss: 0.113 train_accuracy: 0.914 test_accuracy: 0.907\n",
            "[5,   125] train_loss: 0.253 train_accuracy: 0.917 test_accuracy: 0.911\n",
            "[5,   130] train_loss: 0.208 train_accuracy: 0.920 test_accuracy: 0.911\n",
            "[5,   135] train_loss: 0.657 train_accuracy: 0.915 test_accuracy: 0.905\n",
            "[5,   140] train_loss: 0.765 train_accuracy: 0.909 test_accuracy: 0.900\n",
            "[5,   145] train_loss: 0.535 train_accuracy: 0.913 test_accuracy: 0.906\n",
            "[5,   150] train_loss: 0.247 train_accuracy: 0.912 test_accuracy: 0.902\n",
            "[5,   155] train_loss: 0.221 train_accuracy: 0.912 test_accuracy: 0.897\n",
            "[5,   160] train_loss: 0.071 train_accuracy: 0.911 test_accuracy: 0.894\n",
            "[5,   165] train_loss: 0.946 train_accuracy: 0.914 test_accuracy: 0.894\n",
            "[5,   170] train_loss: 0.073 train_accuracy: 0.908 test_accuracy: 0.888\n",
            "[5,   175] train_loss: 0.291 train_accuracy: 0.902 test_accuracy: 0.888\n",
            "[5,   180] train_loss: 0.239 train_accuracy: 0.899 test_accuracy: 0.886\n",
            "[5,   185] train_loss: 1.016 train_accuracy: 0.899 test_accuracy: 0.892\n",
            "[5,   190] train_loss: 0.167 train_accuracy: 0.897 test_accuracy: 0.897\n",
            "[5,   195] train_loss: 0.151 train_accuracy: 0.875 test_accuracy: 0.859\n",
            "[5,   200] train_loss: 0.365 train_accuracy: 0.891 test_accuracy: 0.875\n",
            "[5,   205] train_loss: 0.182 train_accuracy: 0.915 test_accuracy: 0.894\n",
            "[5,   210] train_loss: 0.210 train_accuracy: 0.909 test_accuracy: 0.888\n",
            "[5,   215] train_loss: 0.448 train_accuracy: 0.905 test_accuracy: 0.881\n",
            "[5,   220] train_loss: 0.287 train_accuracy: 0.897 test_accuracy: 0.876\n",
            "[5,   225] train_loss: 0.234 train_accuracy: 0.882 test_accuracy: 0.865\n",
            "[5,   230] train_loss: 0.240 train_accuracy: 0.895 test_accuracy: 0.873\n",
            "[5,   235] train_loss: 0.175 train_accuracy: 0.897 test_accuracy: 0.877\n",
            "[5,   240] train_loss: 0.308 train_accuracy: 0.906 test_accuracy: 0.887\n",
            "[5,   245] train_loss: 0.230 train_accuracy: 0.904 test_accuracy: 0.888\n",
            "[5,   250] train_loss: 0.035 train_accuracy: 0.886 test_accuracy: 0.874\n",
            "[5,   255] train_loss: 0.457 train_accuracy: 0.908 test_accuracy: 0.886\n",
            "[5,   260] train_loss: 0.766 train_accuracy: 0.908 test_accuracy: 0.883\n",
            "[5,   265] train_loss: 0.314 train_accuracy: 0.911 test_accuracy: 0.886\n",
            "[5,   270] train_loss: 0.099 train_accuracy: 0.913 test_accuracy: 0.891\n",
            "[5,   275] train_loss: 0.371 train_accuracy: 0.913 test_accuracy: 0.892\n",
            "[5,   280] train_loss: 0.344 train_accuracy: 0.914 test_accuracy: 0.896\n",
            "[5,   285] train_loss: 0.378 train_accuracy: 0.911 test_accuracy: 0.895\n",
            "[5,   290] train_loss: 0.037 train_accuracy: 0.899 test_accuracy: 0.885\n",
            "[5,   295] train_loss: 0.391 train_accuracy: 0.902 test_accuracy: 0.891\n",
            "[5,   300] train_loss: 0.424 train_accuracy: 0.916 test_accuracy: 0.900\n",
            "[5,   305] train_loss: 0.982 train_accuracy: 0.922 test_accuracy: 0.906\n",
            "[5,   310] train_loss: 0.304 train_accuracy: 0.912 test_accuracy: 0.894\n",
            "[5,   315] train_loss: 0.546 train_accuracy: 0.903 test_accuracy: 0.884\n",
            "[5,   320] train_loss: 0.391 train_accuracy: 0.895 test_accuracy: 0.873\n",
            "[5,   325] train_loss: 0.481 train_accuracy: 0.900 test_accuracy: 0.884\n",
            "[5,   330] train_loss: 0.027 train_accuracy: 0.894 test_accuracy: 0.880\n",
            "[5,   335] train_loss: 0.159 train_accuracy: 0.894 test_accuracy: 0.876\n",
            "[5,   340] train_loss: 0.456 train_accuracy: 0.889 test_accuracy: 0.875\n",
            "[5,   345] train_loss: 0.302 train_accuracy: 0.891 test_accuracy: 0.876\n",
            "[5,   350] train_loss: 0.072 train_accuracy: 0.890 test_accuracy: 0.877\n",
            "[5,   355] train_loss: 0.356 train_accuracy: 0.901 test_accuracy: 0.884\n",
            "[5,   360] train_loss: 0.357 train_accuracy: 0.904 test_accuracy: 0.881\n",
            "[5,   365] train_loss: 0.401 train_accuracy: 0.894 test_accuracy: 0.872\n",
            "[5,   370] train_loss: 0.604 train_accuracy: 0.913 test_accuracy: 0.885\n",
            "[5,   375] train_loss: 0.289 train_accuracy: 0.903 test_accuracy: 0.868\n",
            "[5,   380] train_loss: 0.333 train_accuracy: 0.895 test_accuracy: 0.857\n",
            "[5,   385] train_loss: 0.532 train_accuracy: 0.889 test_accuracy: 0.849\n",
            "[5,   390] train_loss: 0.346 train_accuracy: 0.908 test_accuracy: 0.883\n",
            "[5,   395] train_loss: 0.690 train_accuracy: 0.891 test_accuracy: 0.896\n",
            "[5,   400] train_loss: 0.441 train_accuracy: 0.875 test_accuracy: 0.896\n",
            "[5,   405] train_loss: 0.260 train_accuracy: 0.880 test_accuracy: 0.903\n",
            "[5,   410] train_loss: 0.435 train_accuracy: 0.902 test_accuracy: 0.913\n",
            "[5,   415] train_loss: 0.446 train_accuracy: 0.915 test_accuracy: 0.916\n",
            "[5,   420] train_loss: 0.242 train_accuracy: 0.920 test_accuracy: 0.914\n",
            "[5,   425] train_loss: 0.394 train_accuracy: 0.919 test_accuracy: 0.909\n",
            "[5,   430] train_loss: 0.676 train_accuracy: 0.921 test_accuracy: 0.910\n",
            "[5,   435] train_loss: 0.162 train_accuracy: 0.913 test_accuracy: 0.900\n",
            "[5,   440] train_loss: 0.496 train_accuracy: 0.909 test_accuracy: 0.895\n",
            "[5,   445] train_loss: 0.447 train_accuracy: 0.909 test_accuracy: 0.901\n",
            "[5,   450] train_loss: 0.135 train_accuracy: 0.906 test_accuracy: 0.899\n",
            "[5,   455] train_loss: 0.647 train_accuracy: 0.906 test_accuracy: 0.895\n",
            "[5,   460] train_loss: 0.170 train_accuracy: 0.879 test_accuracy: 0.878\n",
            "[5,   465] train_loss: 0.316 train_accuracy: 0.860 test_accuracy: 0.870\n",
            "[5,   470] train_loss: 0.165 train_accuracy: 0.861 test_accuracy: 0.870\n",
            "[5,   475] train_loss: 0.668 train_accuracy: 0.864 test_accuracy: 0.870\n",
            "[5,   480] train_loss: 0.650 train_accuracy: 0.864 test_accuracy: 0.865\n",
            "[5,   485] train_loss: 0.468 train_accuracy: 0.891 test_accuracy: 0.864\n",
            "[5,   490] train_loss: 0.249 train_accuracy: 0.886 test_accuracy: 0.854\n",
            "[5,   495] train_loss: 0.326 train_accuracy: 0.887 test_accuracy: 0.857\n",
            "[5,   500] train_loss: 0.400 train_accuracy: 0.900 test_accuracy: 0.867\n",
            "[5,   505] train_loss: 0.466 train_accuracy: 0.903 test_accuracy: 0.870\n",
            "[5,   510] train_loss: 0.332 train_accuracy: 0.893 test_accuracy: 0.858\n",
            "[5,   515] train_loss: 0.516 train_accuracy: 0.891 test_accuracy: 0.854\n",
            "[5,   520] train_loss: 0.462 train_accuracy: 0.890 test_accuracy: 0.854\n",
            "[5,   525] train_loss: 0.758 train_accuracy: 0.906 test_accuracy: 0.876\n",
            "[5,   530] train_loss: 0.318 train_accuracy: 0.909 test_accuracy: 0.884\n",
            "[5,   535] train_loss: 0.493 train_accuracy: 0.907 test_accuracy: 0.884\n",
            "[5,   540] train_loss: 0.636 train_accuracy: 0.910 test_accuracy: 0.888\n",
            "[5,   545] train_loss: 0.493 train_accuracy: 0.916 test_accuracy: 0.900\n",
            "[5,   550] train_loss: 0.450 train_accuracy: 0.917 test_accuracy: 0.904\n",
            "[5,   555] train_loss: 0.115 train_accuracy: 0.905 test_accuracy: 0.894\n",
            "[5,   560] train_loss: 0.460 train_accuracy: 0.897 test_accuracy: 0.888\n",
            "[5,   565] train_loss: 0.479 train_accuracy: 0.906 test_accuracy: 0.889\n",
            "[5,   570] train_loss: 0.363 train_accuracy: 0.901 test_accuracy: 0.886\n",
            "[5,   575] train_loss: 0.273 train_accuracy: 0.904 test_accuracy: 0.886\n",
            "[5,   580] train_loss: 0.442 train_accuracy: 0.911 test_accuracy: 0.888\n",
            "[5,   585] train_loss: 0.541 train_accuracy: 0.910 test_accuracy: 0.886\n",
            "[5,   590] train_loss: 0.513 train_accuracy: 0.910 test_accuracy: 0.888\n",
            "[5,   595] train_loss: 0.694 train_accuracy: 0.911 test_accuracy: 0.896\n",
            "[5,   600] train_loss: 0.057 train_accuracy: 0.892 test_accuracy: 0.892\n",
            "[5,   605] train_loss: 0.571 train_accuracy: 0.894 test_accuracy: 0.898\n",
            "[5,   610] train_loss: 0.342 train_accuracy: 0.923 test_accuracy: 0.906\n",
            "[5,   615] train_loss: 0.206 train_accuracy: 0.916 test_accuracy: 0.893\n",
            "[5,   620] train_loss: 0.529 train_accuracy: 0.914 test_accuracy: 0.891\n",
            "[5,   625] train_loss: 0.280 train_accuracy: 0.918 test_accuracy: 0.894\n",
            "[5,   630] train_loss: 0.150 train_accuracy: 0.917 test_accuracy: 0.896\n",
            "[5,   635] train_loss: 0.527 train_accuracy: 0.916 test_accuracy: 0.901\n",
            "[5,   640] train_loss: 0.314 train_accuracy: 0.923 test_accuracy: 0.900\n",
            "[5,   645] train_loss: 0.176 train_accuracy: 0.925 test_accuracy: 0.899\n",
            "[5,   650] train_loss: 0.319 train_accuracy: 0.927 test_accuracy: 0.899\n",
            "[5,   655] train_loss: 0.156 train_accuracy: 0.919 test_accuracy: 0.894\n",
            "[5,   660] train_loss: 0.365 train_accuracy: 0.914 test_accuracy: 0.891\n",
            "[5,   665] train_loss: 0.127 train_accuracy: 0.919 test_accuracy: 0.894\n",
            "[5,   670] train_loss: 0.195 train_accuracy: 0.919 test_accuracy: 0.891\n",
            "[5,   675] train_loss: 0.489 train_accuracy: 0.924 test_accuracy: 0.897\n",
            "[5,   680] train_loss: 0.215 train_accuracy: 0.928 test_accuracy: 0.900\n",
            "[5,   685] train_loss: 0.144 train_accuracy: 0.921 test_accuracy: 0.896\n",
            "[5,   690] train_loss: 0.634 train_accuracy: 0.917 test_accuracy: 0.898\n",
            "[5,   695] train_loss: 0.671 train_accuracy: 0.914 test_accuracy: 0.904\n",
            "[5,   700] train_loss: 0.357 train_accuracy: 0.902 test_accuracy: 0.903\n",
            "[5,   705] train_loss: 0.397 train_accuracy: 0.894 test_accuracy: 0.898\n",
            "[5,   710] train_loss: 0.418 train_accuracy: 0.904 test_accuracy: 0.901\n",
            "[5,   715] train_loss: 0.224 train_accuracy: 0.914 test_accuracy: 0.901\n",
            "[5,   720] train_loss: 0.031 train_accuracy: 0.917 test_accuracy: 0.899\n",
            "[5,   725] train_loss: 0.116 train_accuracy: 0.915 test_accuracy: 0.892\n",
            "[5,   730] train_loss: 0.068 train_accuracy: 0.911 test_accuracy: 0.886\n",
            "[5,   735] train_loss: 0.331 train_accuracy: 0.914 test_accuracy: 0.890\n",
            "[5,   740] train_loss: 0.553 train_accuracy: 0.917 test_accuracy: 0.895\n",
            "[5,   745] train_loss: 0.214 train_accuracy: 0.914 test_accuracy: 0.903\n",
            "[5,   750] train_loss: 0.358 train_accuracy: 0.924 test_accuracy: 0.902\n",
            "[5,   755] train_loss: 0.422 train_accuracy: 0.922 test_accuracy: 0.890\n",
            "[5,   760] train_loss: 0.228 train_accuracy: 0.909 test_accuracy: 0.878\n",
            "[5,   765] train_loss: 0.359 train_accuracy: 0.905 test_accuracy: 0.874\n",
            "[5,   770] train_loss: 0.250 train_accuracy: 0.898 test_accuracy: 0.869\n",
            "[5,   775] train_loss: 0.384 train_accuracy: 0.883 test_accuracy: 0.855\n",
            "[5,   780] train_loss: 0.468 train_accuracy: 0.868 test_accuracy: 0.853\n",
            "[5,   785] train_loss: 0.323 train_accuracy: 0.880 test_accuracy: 0.873\n",
            "[5,   790] train_loss: 0.370 train_accuracy: 0.890 test_accuracy: 0.879\n",
            "[5,   795] train_loss: 0.294 train_accuracy: 0.898 test_accuracy: 0.881\n",
            "[5,   800] train_loss: 0.390 train_accuracy: 0.896 test_accuracy: 0.876\n",
            "[5,   805] train_loss: 0.153 train_accuracy: 0.899 test_accuracy: 0.876\n",
            "[5,   810] train_loss: 0.176 train_accuracy: 0.905 test_accuracy: 0.879\n",
            "[5,   815] train_loss: 0.811 train_accuracy: 0.914 test_accuracy: 0.889\n",
            "[5,   820] train_loss: 0.372 train_accuracy: 0.918 test_accuracy: 0.893\n",
            "[5,   825] train_loss: 1.018 train_accuracy: 0.918 test_accuracy: 0.900\n",
            "[5,   830] train_loss: 0.177 train_accuracy: 0.916 test_accuracy: 0.903\n",
            "[5,   835] train_loss: 0.479 train_accuracy: 0.920 test_accuracy: 0.896\n",
            "[5,   840] train_loss: 0.097 train_accuracy: 0.893 test_accuracy: 0.862\n",
            "[5,   845] train_loss: 0.263 train_accuracy: 0.897 test_accuracy: 0.866\n",
            "[5,   850] train_loss: 0.252 train_accuracy: 0.896 test_accuracy: 0.867\n",
            "[5,   855] train_loss: 0.092 train_accuracy: 0.906 test_accuracy: 0.883\n",
            "[5,   860] train_loss: 0.310 train_accuracy: 0.914 test_accuracy: 0.896\n",
            "[5,   865] train_loss: 0.338 train_accuracy: 0.924 test_accuracy: 0.906\n",
            "[5,   870] train_loss: 0.407 train_accuracy: 0.889 test_accuracy: 0.886\n",
            "[5,   875] train_loss: 1.028 train_accuracy: 0.864 test_accuracy: 0.855\n",
            "[5,   880] train_loss: 0.262 train_accuracy: 0.876 test_accuracy: 0.855\n",
            "[5,   885] train_loss: 0.294 train_accuracy: 0.868 test_accuracy: 0.846\n",
            "[5,   890] train_loss: 0.360 train_accuracy: 0.869 test_accuracy: 0.847\n",
            "[5,   895] train_loss: 0.194 train_accuracy: 0.865 test_accuracy: 0.850\n",
            "[5,   900] train_loss: 0.584 train_accuracy: 0.882 test_accuracy: 0.869\n",
            "[5,   905] train_loss: 0.226 train_accuracy: 0.895 test_accuracy: 0.880\n",
            "[5,   910] train_loss: 0.311 train_accuracy: 0.905 test_accuracy: 0.884\n",
            "[5,   915] train_loss: 0.293 train_accuracy: 0.900 test_accuracy: 0.875\n",
            "[5,   920] train_loss: 0.417 train_accuracy: 0.892 test_accuracy: 0.863\n",
            "[5,   925] train_loss: 0.267 train_accuracy: 0.887 test_accuracy: 0.858\n",
            "[5,   930] train_loss: 0.304 train_accuracy: 0.886 test_accuracy: 0.859\n",
            "[5,   935] train_loss: 0.371 train_accuracy: 0.899 test_accuracy: 0.874\n",
            "[5,   940] train_loss: 0.396 train_accuracy: 0.901 test_accuracy: 0.879\n",
            "[5,   945] train_loss: 0.058 train_accuracy: 0.899 test_accuracy: 0.879\n",
            "[5,   950] train_loss: 0.378 train_accuracy: 0.907 test_accuracy: 0.880\n",
            "[5,   955] train_loss: 0.182 train_accuracy: 0.904 test_accuracy: 0.872\n",
            "[5,   960] train_loss: 0.083 train_accuracy: 0.847 test_accuracy: 0.813\n",
            "[5,   965] train_loss: 0.835 train_accuracy: 0.903 test_accuracy: 0.874\n",
            "[5,   970] train_loss: 0.231 train_accuracy: 0.912 test_accuracy: 0.892\n",
            "[5,   975] train_loss: 0.447 train_accuracy: 0.896 test_accuracy: 0.886\n",
            "[5,   980] train_loss: 0.037 train_accuracy: 0.878 test_accuracy: 0.877\n",
            "[5,   985] train_loss: 0.608 train_accuracy: 0.879 test_accuracy: 0.877\n",
            "[5,   990] train_loss: 0.529 train_accuracy: 0.871 test_accuracy: 0.871\n",
            "[5,   995] train_loss: 0.761 train_accuracy: 0.862 test_accuracy: 0.864\n",
            "[5,  1000] train_loss: 0.826 train_accuracy: 0.886 test_accuracy: 0.873\n",
            "[5,  1005] train_loss: 0.150 train_accuracy: 0.882 test_accuracy: 0.863\n",
            "[5,  1010] train_loss: 0.371 train_accuracy: 0.895 test_accuracy: 0.871\n",
            "[5,  1015] train_loss: 0.250 train_accuracy: 0.899 test_accuracy: 0.865\n",
            "[5,  1020] train_loss: 0.126 train_accuracy: 0.896 test_accuracy: 0.859\n",
            "[5,  1025] train_loss: 0.302 train_accuracy: 0.895 test_accuracy: 0.859\n",
            "[5,  1030] train_loss: 0.202 train_accuracy: 0.898 test_accuracy: 0.864\n",
            "[5,  1035] train_loss: 0.261 train_accuracy: 0.892 test_accuracy: 0.861\n",
            "[5,  1040] train_loss: 0.021 train_accuracy: 0.875 test_accuracy: 0.852\n",
            "[5,  1045] train_loss: 0.743 train_accuracy: 0.882 test_accuracy: 0.860\n",
            "[5,  1050] train_loss: 0.479 train_accuracy: 0.859 test_accuracy: 0.836\n",
            "[5,  1055] train_loss: 0.479 train_accuracy: 0.835 test_accuracy: 0.812\n",
            "[5,  1060] train_loss: 0.596 train_accuracy: 0.842 test_accuracy: 0.820\n",
            "[5,  1065] train_loss: 0.825 train_accuracy: 0.852 test_accuracy: 0.826\n",
            "[5,  1070] train_loss: 0.096 train_accuracy: 0.861 test_accuracy: 0.832\n",
            "[5,  1075] train_loss: 0.353 train_accuracy: 0.859 test_accuracy: 0.830\n",
            "[5,  1080] train_loss: 0.300 train_accuracy: 0.854 test_accuracy: 0.829\n",
            "[5,  1085] train_loss: 1.024 train_accuracy: 0.861 test_accuracy: 0.841\n",
            "[5,  1090] train_loss: 0.686 train_accuracy: 0.870 test_accuracy: 0.846\n",
            "[5,  1095] train_loss: 0.276 train_accuracy: 0.884 test_accuracy: 0.863\n",
            "[5,  1100] train_loss: 0.278 train_accuracy: 0.896 test_accuracy: 0.872\n",
            "[5,  1105] train_loss: 0.374 train_accuracy: 0.899 test_accuracy: 0.873\n",
            "[5,  1110] train_loss: 0.522 train_accuracy: 0.900 test_accuracy: 0.877\n",
            "[5,  1115] train_loss: 0.593 train_accuracy: 0.893 test_accuracy: 0.876\n",
            "[5,  1120] train_loss: 0.347 train_accuracy: 0.877 test_accuracy: 0.872\n",
            "[5,  1125] train_loss: 0.446 train_accuracy: 0.888 test_accuracy: 0.867\n",
            "[5,  1130] train_loss: 0.124 train_accuracy: 0.880 test_accuracy: 0.850\n",
            "[5,  1135] train_loss: 0.620 train_accuracy: 0.883 test_accuracy: 0.854\n",
            "[5,  1140] train_loss: 0.381 train_accuracy: 0.886 test_accuracy: 0.858\n",
            "[5,  1145] train_loss: 0.173 train_accuracy: 0.890 test_accuracy: 0.861\n",
            "[5,  1150] train_loss: 0.046 train_accuracy: 0.878 test_accuracy: 0.863\n",
            "[5,  1155] train_loss: 0.262 train_accuracy: 0.868 test_accuracy: 0.866\n",
            "[5,  1160] train_loss: 0.261 train_accuracy: 0.879 test_accuracy: 0.874\n",
            "[5,  1165] train_loss: 0.375 train_accuracy: 0.903 test_accuracy: 0.882\n",
            "[5,  1170] train_loss: 0.305 train_accuracy: 0.913 test_accuracy: 0.890\n",
            "[5,  1175] train_loss: 0.237 train_accuracy: 0.870 test_accuracy: 0.857\n",
            "[5,  1180] train_loss: 0.439 train_accuracy: 0.899 test_accuracy: 0.886\n",
            "[5,  1185] train_loss: 0.182 train_accuracy: 0.901 test_accuracy: 0.888\n",
            "[5,  1190] train_loss: 0.360 train_accuracy: 0.905 test_accuracy: 0.890\n",
            "[5,  1195] train_loss: 0.070 train_accuracy: 0.910 test_accuracy: 0.893\n",
            "[5,  1200] train_loss: 0.435 train_accuracy: 0.917 test_accuracy: 0.896\n",
            "[5,  1205] train_loss: 0.608 train_accuracy: 0.894 test_accuracy: 0.864\n",
            "[5,  1210] train_loss: 0.376 train_accuracy: 0.873 test_accuracy: 0.844\n",
            "[5,  1215] train_loss: 0.378 train_accuracy: 0.853 test_accuracy: 0.824\n",
            "[5,  1220] train_loss: 0.347 train_accuracy: 0.849 test_accuracy: 0.816\n",
            "[5,  1225] train_loss: 0.738 train_accuracy: 0.836 test_accuracy: 0.801\n",
            "[5,  1230] train_loss: 0.952 train_accuracy: 0.835 test_accuracy: 0.800\n",
            "[5,  1235] train_loss: 0.851 train_accuracy: 0.864 test_accuracy: 0.832\n",
            "[5,  1240] train_loss: 0.306 train_accuracy: 0.868 test_accuracy: 0.836\n",
            "[5,  1245] train_loss: 0.118 train_accuracy: 0.868 test_accuracy: 0.841\n",
            "[5,  1250] train_loss: 0.911 train_accuracy: 0.892 test_accuracy: 0.867\n",
            "[5,  1255] train_loss: 0.094 train_accuracy: 0.903 test_accuracy: 0.880\n",
            "[5,  1260] train_loss: 0.219 train_accuracy: 0.902 test_accuracy: 0.872\n",
            "[5,  1265] train_loss: 0.674 train_accuracy: 0.901 test_accuracy: 0.871\n",
            "[5,  1270] train_loss: 0.086 train_accuracy: 0.902 test_accuracy: 0.873\n",
            "[5,  1275] train_loss: 0.275 train_accuracy: 0.909 test_accuracy: 0.882\n",
            "[5,  1280] train_loss: 0.336 train_accuracy: 0.915 test_accuracy: 0.887\n",
            "[5,  1285] train_loss: 0.069 train_accuracy: 0.916 test_accuracy: 0.888\n",
            "[5,  1290] train_loss: 0.419 train_accuracy: 0.909 test_accuracy: 0.881\n",
            "[5,  1295] train_loss: 0.407 train_accuracy: 0.912 test_accuracy: 0.884\n",
            "[5,  1300] train_loss: 0.243 train_accuracy: 0.917 test_accuracy: 0.889\n",
            "[5,  1305] train_loss: 0.455 train_accuracy: 0.910 test_accuracy: 0.890\n",
            "[5,  1310] train_loss: 0.310 train_accuracy: 0.895 test_accuracy: 0.878\n",
            "[5,  1315] train_loss: 0.349 train_accuracy: 0.881 test_accuracy: 0.865\n",
            "[5,  1320] train_loss: 0.122 train_accuracy: 0.871 test_accuracy: 0.854\n",
            "[5,  1325] train_loss: 0.095 train_accuracy: 0.866 test_accuracy: 0.847\n",
            "[5,  1330] train_loss: 0.137 train_accuracy: 0.872 test_accuracy: 0.850\n",
            "[5,  1335] train_loss: 0.819 train_accuracy: 0.881 test_accuracy: 0.853\n",
            "[5,  1340] train_loss: 0.451 train_accuracy: 0.881 test_accuracy: 0.853\n",
            "[5,  1345] train_loss: 0.279 train_accuracy: 0.892 test_accuracy: 0.864\n",
            "[5,  1350] train_loss: 0.181 train_accuracy: 0.899 test_accuracy: 0.869\n",
            "[5,  1355] train_loss: 0.621 train_accuracy: 0.900 test_accuracy: 0.868\n",
            "[5,  1360] train_loss: 0.202 train_accuracy: 0.906 test_accuracy: 0.877\n",
            "[5,  1365] train_loss: 0.792 train_accuracy: 0.905 test_accuracy: 0.879\n",
            "[5,  1370] train_loss: 0.185 train_accuracy: 0.901 test_accuracy: 0.877\n",
            "[5,  1375] train_loss: 0.286 train_accuracy: 0.904 test_accuracy: 0.879\n",
            "[5,  1380] train_loss: 0.241 train_accuracy: 0.910 test_accuracy: 0.886\n",
            "[5,  1385] train_loss: 0.098 train_accuracy: 0.913 test_accuracy: 0.890\n",
            "[5,  1390] train_loss: 0.141 train_accuracy: 0.920 test_accuracy: 0.893\n",
            "[5,  1395] train_loss: 0.728 train_accuracy: 0.922 test_accuracy: 0.894\n",
            "[5,  1400] train_loss: 0.059 train_accuracy: 0.923 test_accuracy: 0.897\n",
            "[5,  1405] train_loss: 0.285 train_accuracy: 0.919 test_accuracy: 0.900\n",
            "[5,  1410] train_loss: 0.232 train_accuracy: 0.898 test_accuracy: 0.902\n",
            "[5,  1415] train_loss: 0.338 train_accuracy: 0.879 test_accuracy: 0.901\n",
            "[5,  1420] train_loss: 0.208 train_accuracy: 0.874 test_accuracy: 0.901\n",
            "[5,  1425] train_loss: 0.591 train_accuracy: 0.911 test_accuracy: 0.907\n",
            "[5,  1430] train_loss: 0.214 train_accuracy: 0.931 test_accuracy: 0.911\n",
            "[5,  1435] train_loss: 0.117 train_accuracy: 0.936 test_accuracy: 0.912\n",
            "[5,  1440] train_loss: 0.739 train_accuracy: 0.936 test_accuracy: 0.911\n",
            "[5,  1445] train_loss: 0.075 train_accuracy: 0.933 test_accuracy: 0.907\n",
            "[5,  1450] train_loss: 0.092 train_accuracy: 0.932 test_accuracy: 0.905\n",
            "[5,  1455] train_loss: 0.341 train_accuracy: 0.931 test_accuracy: 0.910\n",
            "[5,  1460] train_loss: 0.469 train_accuracy: 0.928 test_accuracy: 0.912\n",
            "[5,  1465] train_loss: 0.190 train_accuracy: 0.920 test_accuracy: 0.914\n",
            "[5,  1470] train_loss: 0.023 train_accuracy: 0.914 test_accuracy: 0.913\n",
            "[5,  1475] train_loss: 0.659 train_accuracy: 0.895 test_accuracy: 0.905\n",
            "[5,  1480] train_loss: 0.547 train_accuracy: 0.892 test_accuracy: 0.896\n",
            "[5,  1485] train_loss: 0.531 train_accuracy: 0.909 test_accuracy: 0.905\n",
            "[5,  1490] train_loss: 0.216 train_accuracy: 0.906 test_accuracy: 0.894\n",
            "[5,  1495] train_loss: 0.337 train_accuracy: 0.899 test_accuracy: 0.887\n",
            "[5,  1500] train_loss: 0.289 train_accuracy: 0.897 test_accuracy: 0.886\n",
            "[5,  1505] train_loss: 0.213 train_accuracy: 0.896 test_accuracy: 0.885\n",
            "[5,  1510] train_loss: 0.630 train_accuracy: 0.897 test_accuracy: 0.890\n",
            "[5,  1515] train_loss: 0.535 train_accuracy: 0.890 test_accuracy: 0.875\n",
            "[5,  1520] train_loss: 0.578 train_accuracy: 0.870 test_accuracy: 0.855\n",
            "[5,  1525] train_loss: 0.655 train_accuracy: 0.871 test_accuracy: 0.852\n",
            "[5,  1530] train_loss: 0.351 train_accuracy: 0.876 test_accuracy: 0.853\n",
            "[5,  1535] train_loss: 0.560 train_accuracy: 0.891 test_accuracy: 0.864\n",
            "[5,  1540] train_loss: 0.881 train_accuracy: 0.909 test_accuracy: 0.880\n",
            "[5,  1545] train_loss: 0.222 train_accuracy: 0.929 test_accuracy: 0.901\n",
            "[5,  1550] train_loss: 0.463 train_accuracy: 0.931 test_accuracy: 0.908\n",
            "[5,  1555] train_loss: 0.257 train_accuracy: 0.929 test_accuracy: 0.909\n",
            "[5,  1560] train_loss: 0.144 train_accuracy: 0.929 test_accuracy: 0.905\n",
            "[5,  1565] train_loss: 0.264 train_accuracy: 0.928 test_accuracy: 0.901\n",
            "[5,  1570] train_loss: 0.501 train_accuracy: 0.915 test_accuracy: 0.890\n",
            "[5,  1575] train_loss: 0.114 train_accuracy: 0.900 test_accuracy: 0.879\n",
            "[5,  1580] train_loss: 0.286 train_accuracy: 0.911 test_accuracy: 0.888\n",
            "[5,  1585] train_loss: 0.579 train_accuracy: 0.920 test_accuracy: 0.896\n",
            "[5,  1590] train_loss: 0.288 train_accuracy: 0.926 test_accuracy: 0.900\n",
            "[5,  1595] train_loss: 0.222 train_accuracy: 0.917 test_accuracy: 0.897\n",
            "[5,  1600] train_loss: 0.759 train_accuracy: 0.913 test_accuracy: 0.895\n",
            "[5,  1605] train_loss: 0.394 train_accuracy: 0.921 test_accuracy: 0.897\n",
            "[5,  1610] train_loss: 0.078 train_accuracy: 0.920 test_accuracy: 0.894\n",
            "[5,  1615] train_loss: 0.230 train_accuracy: 0.917 test_accuracy: 0.889\n",
            "[5,  1620] train_loss: 0.299 train_accuracy: 0.915 test_accuracy: 0.885\n",
            "[5,  1625] train_loss: 0.035 train_accuracy: 0.909 test_accuracy: 0.882\n",
            "[5,  1630] train_loss: 0.614 train_accuracy: 0.920 test_accuracy: 0.893\n",
            "[5,  1635] train_loss: 0.237 train_accuracy: 0.925 test_accuracy: 0.902\n",
            "[5,  1640] train_loss: 0.395 train_accuracy: 0.930 test_accuracy: 0.907\n",
            "[5,  1645] train_loss: 0.394 train_accuracy: 0.931 test_accuracy: 0.911\n",
            "[5,  1650] train_loss: 0.206 train_accuracy: 0.930 test_accuracy: 0.911\n",
            "[5,  1655] train_loss: 0.160 train_accuracy: 0.929 test_accuracy: 0.908\n",
            "[5,  1660] train_loss: 0.551 train_accuracy: 0.929 test_accuracy: 0.908\n",
            "[5,  1665] train_loss: 0.242 train_accuracy: 0.928 test_accuracy: 0.905\n",
            "[5,  1670] train_loss: 0.297 train_accuracy: 0.928 test_accuracy: 0.902\n",
            "[5,  1675] train_loss: 0.179 train_accuracy: 0.926 test_accuracy: 0.899\n",
            "[5,  1680] train_loss: 0.102 train_accuracy: 0.922 test_accuracy: 0.898\n",
            "[5,  1685] train_loss: 0.355 train_accuracy: 0.927 test_accuracy: 0.902\n",
            "[5,  1690] train_loss: 0.322 train_accuracy: 0.933 test_accuracy: 0.908\n",
            "[5,  1695] train_loss: 0.185 train_accuracy: 0.931 test_accuracy: 0.909\n",
            "[5,  1700] train_loss: 0.473 train_accuracy: 0.928 test_accuracy: 0.907\n",
            "[5,  1705] train_loss: 0.048 train_accuracy: 0.923 test_accuracy: 0.908\n",
            "[5,  1710] train_loss: 0.337 train_accuracy: 0.916 test_accuracy: 0.903\n",
            "[5,  1715] train_loss: 0.554 train_accuracy: 0.917 test_accuracy: 0.903\n",
            "[5,  1720] train_loss: 0.241 train_accuracy: 0.921 test_accuracy: 0.908\n",
            "[5,  1725] train_loss: 0.238 train_accuracy: 0.925 test_accuracy: 0.909\n"
          ]
        }
      ],
      "source": [
        "import torch.nn as nn\n",
        "import torch.nn.functional as F\n",
        "from collections import Counter, defaultdict\n",
        "from itertools import combinations\n",
        "import random\n",
        "\n",
        "a_train = []  # To store training accuracy\n",
        "a_test = []  # To store test accuracy\n",
        "lossaaa = []  # To store loss values\n",
        "Inf = []  # To store information values\n",
        "Var_all = []  # To store all variation values\n",
        "Generalization_Ratio_ = []  # To store generalization ratios\n",
        "dicide_action = []  # To store decision actions\n",
        "loss_before = torch.tensor(30.0)  # Initial loss value\n",
        "los = torch.tensor(30.0)  # Initial loss value\n",
        "dis_before_A1 = torch.tensor(0)  # Distance before action A1\n",
        "dis_before_A2 = torch.tensor(0)  # Distance before action A2\n",
        "dis = torch.tensor(1)  # Distance increment\n",
        "per = \"N\"  # Previous iteration's action\n",
        "state_before = torch.tensor(0.0)  # Previous state value\n",
        "categrary_number = 10  # Number of categories\n",
        "tra_val_number = 2  # Training validation number\n",
        "\n",
        "for epoch in range(5):  # Loop over the dataset multiple times\n",
        "    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader\n",
        "    running_loss = 0.0  # Running loss\n",
        "    running_loss_all = 0.0  # Running loss for all\n",
        "    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable\n",
        "    los = Variable(los, requires_grad=False)  # Make los a variable\n",
        "    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable\n",
        "    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable\n",
        "    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable\n",
        "    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable\n",
        "    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable\n",
        "    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable\n",
        "\n",
        "    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data\n",
        "        ### calculate losses\n",
        "        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability\n",
        "        labels = labels.cuda()  # Move labels to GPU\n",
        "        imgs = imgs.cuda()  # Move images to GPU\n",
        "        out_e1234 = net(imgs)  # Get output from the network\n",
        "        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)\n",
        "\n",
        "        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter\n",
        "        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU\n",
        "        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU\n",
        "        out_e3412 = net(e3412_imgs)  # Get output from the network\n",
        "        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)\n",
        "\n",
        "        #################### extract\n",
        "        ## e1 ext count\n",
        "        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU\n",
        "        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU\n",
        "        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ## e2 ext count\n",
        "        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU\n",
        "        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU\n",
        "        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ########### e1 count\n",
        "        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU\n",
        "        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU\n",
        "        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network\n",
        "        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks\n",
        "        split_e12_inform = list(split_e12_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e12_inform)):\n",
        "            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e12 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e12[category] = counts_E12[category]  # Update counts\n",
        "            split_e12_inform[i] = counts_e12  # Store updated counts\n",
        "\n",
        "        ########### e2 count\n",
        "        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU\n",
        "        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU\n",
        "        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network\n",
        "        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks\n",
        "        split_e34_inform = list(split_e34_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e34_inform)):\n",
        "            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e34 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e34[category] = counts_E34[category]  # Update counts\n",
        "            split_e34_inform[i] = counts_e34  # Store updated counts\n",
        "\n",
        "        ##########\n",
        "        ## IN_IN_pro\n",
        "        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform\n",
        "        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform\n",
        "        counts_in_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_in_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_12[category] = counts_i_12[category]  # Update counts for e12\n",
        "        total_samples_in_12 = len(e12_inform)  # Total samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_34[category] = counts_i_34[category]  # Update counts for e34\n",
        "        total_samples_in_34 = len(e34_inform)  # Total samples in e34\n",
        "\n",
        "        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12\n",
        "        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12\n",
        "        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12\n",
        "\n",
        "        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34\n",
        "        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34\n",
        "        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34\n",
        "\n",
        "        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted\n",
        "        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted\n",
        "        counts_extra_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_extra_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted\n",
        "        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted\n",
        "        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34\n",
        "\n",
        "        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted\n",
        "        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted\n",
        "        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted\n",
        "\n",
        "        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted\n",
        "        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted\n",
        "        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted\n",
        "\n",
        "        ############################### Variation x ###################################\n",
        "        in_12_all = probabilities_tensor_in_12\n",
        "        in_34_all = probabilities_tensor_in_34\n",
        "        in_12_extra = probabilities_tensor_extra_12\n",
        "        in_34_extra = probabilities_tensor_extra_34\n",
        "        weight_tra_probility_all = 1.0 / 2\n",
        "        dow_all = \\\n",
        "              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +\n",
        "              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)\n",
        "        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all\n",
        "        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all\n",
        "        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all\n",
        "        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all\n",
        "        in_1_all = in_1_all[0]\n",
        "        in_2_all = in_2_all[0]\n",
        "        e_1_all = e_1_all[0]\n",
        "        e_2_all = e_2_all[0]\n",
        "        d_KL_all = torch.zeros(10)\n",
        "        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)\n",
        "        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)\n",
        "        d_KL_all = torch.max(abs(k_divergence_all))\n",
        "        d_KL_all_ = torch.max(abs(k_divergence_all_))\n",
        "        Variation_all = torch.max(d_KL_all, d_KL_all_)\n",
        "        Var_all.append(Variation_all)\n",
        "\n",
        "        ############################ Information ###################################\n",
        "        all_combinations = list(combinations(range(10), 2))\n",
        "        K = categrary_number * (categrary_number - 1)\n",
        "        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor\n",
        "        for c in range(10):\n",
        "            for idx, (i, j) in enumerate(all_combinations):\n",
        "                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                min_value = torch.min(s1, s3)\n",
        "                result_tensor[idx] = min_value.item()\n",
        "                idx_ = idx + len(all_combinations)\n",
        "                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                min_value_ = torch.min(s1_, s3_)\n",
        "                result_tensor[idx_] = min_value_.item()\n",
        "        Information = torch.sum(result_tensor) / K\n",
        "        Inf.append(Information)\n",
        "\n",
        "        ############################ Generalization_Ratio ###################################\n",
        "        Generalization_Ratio = Variation_all * (Information + 1.0) / Information\n",
        "        Generalization_Ratio_.append(Generalization_Ratio)\n",
        "\n",
        "        ############################ Generalization Decision Process (GDP) ###################################\n",
        "        state_now = Generalization_Ratio\n",
        "        loss_before = loss_before.cuda()\n",
        "        state_before = state_before.cuda()\n",
        "        state_dis = state_now - state_before\n",
        "        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)\n",
        "\n",
        "        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,\n",
        "        # choose one based on the reward\n",
        "        if state_dis >= 0.0:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "        else:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "\n",
        "        ####################################\n",
        "        #### optimizer\n",
        "        los = loss - loss_before\n",
        "        state_before = state_now\n",
        "        optimizer_L.zero_grad()\n",
        "        loss.backward()\n",
        "        optimizer_L.step()\n",
        "        loss_before = loss\n",
        "        running_loss += loss.item()\n",
        "\n",
        "        # Print statistics\n",
        "        if step % 5 == 4:  # Print every 500 mini-batches\n",
        "            with torch.no_grad():  # 'with' is a context manager\n",
        "                s_test_image = s_test_image.cuda()\n",
        "                s_test_label = s_test_label.cuda()\n",
        "                s_tra_image = s_tra_image.cuda()\n",
        "                s_tra_label = s_tra_label.cuda()\n",
        "                outputs = net(s_test_image)  # [batch, 10]\n",
        "                predict_y = torch.max(outputs, dim=1)[1]\n",
        "                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)\n",
        "                a_test.append(float(accuracy))\n",
        "                outputs_t = net(s_tra_image)  # [batch, 10]\n",
        "                predict_y_t = torch.max(outputs_t, dim=1)[1]\n",
        "                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)\n",
        "                a_train.append(float(accuracy_t))\n",
        "                lossaaa.append(float(running_loss / 5))\n",
        "                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %\n",
        "                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))\n",
        "                running_loss = 0.0\n",
        "                running_loss = 0.0"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "V6nzH17rUF4p"
      },
      "outputs": [],
      "source": [
        "# Writing training accuracy to file\n",
        "with open('Accuracy_a_train.txt', 'w') as file:\n",
        "    for i in range(len(a_train)):\n",
        "        file.write(str(a_train[i]))\n",
        "        if i < len(a_train) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing test accuracy to file\n",
        "with open('Accuracy_a_test.txt', 'w') as file:\n",
        "    for i in range(len(a_test)):\n",
        "        file.write(str(a_test[i]))\n",
        "        if i < len(a_test) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing loss values to file\n",
        "with open('Accuracy_loss.txt', 'w') as file:\n",
        "    for i in range(len(lossaaa)):\n",
        "        file.write(str(lossaaa[i]))\n",
        "        if i < len(lossaaa) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "33MkiU8MULXN"
      },
      "outputs": [],
      "source": [
        "# Convert tensors to lists\n",
        "Inf_list = [tensor.item() for tensor in Inf]\n",
        "Var_av_list = [tensor.item() for tensor in Var_all]\n",
        "Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]\n",
        "\n",
        "# Writing Inf_list to file\n",
        "with open('Inf_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Inf_list)):\n",
        "        file.write(str(Inf_list[i]))\n",
        "        if i < len(Inf_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Var_av_list to file\n",
        "with open('Var_all_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Var_av_list)):\n",
        "        file.write(str(Var_av_list[i]))\n",
        "        if i < len(Var_av_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Generalization_Ratio_list to file\n",
        "with open('Generalization_Ratio_list.txt', 'w') as file:\n",
        "    for i in range(len(Generalization_Ratio_list)):\n",
        "        file.write(str(Generalization_Ratio_list[i]))\n",
        "        if i < len(Generalization_Ratio_list) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "D0k9Uq6GUO2i"
      },
      "outputs": [],
      "source": [
        "import time\n",
        "import os  # Import the os module\n",
        "\n",
        "os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist\n",
        "model_path = './Models/'  # Define the model path\n",
        "\n",
        "rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'\n",
        "\n",
        "# Save the training results\n",
        "current_model_path = model_path + rq + \"_model.pkl\"  # Create the full path for the model file with the current timestamp\n",
        "torch.save(net, current_model_path)  # Save the model to the specified path\n",
        "print(\"Saved model file: \" + current_model_path)  # Print the path of the saved model file"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Display the highest value that appears during training\n",
        "print(\"train:\", max(a_train))  # Print the highest value in the training data\n",
        "print(\"test:\", max(a_test))  # Print the highest value in the test data\n",
        "print(\"loss:\", min(lossaaa))  # Print the minimum value of the loss"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EAPHw9oW3P47",
        "outputId": "e774a6b1-101e-4976-e542-cf82bca1dbd9"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "train: 0.93616298811545\n",
            "test: 0.9158550396375991\n",
            "loss: 0.014266098896041513\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4cIzbYIDUXt-",
        "outputId": "467eaeb4-fc7e-404c-a3dc-8a7e9e08007b"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "train: 0.92480181200453\n",
            "test: 0.9034\n"
          ]
        }
      ],
      "source": [
        "# Load training and testing datasets from specified directories and apply transformations\n",
        "trainset = datasets.ImageFolder(root='/content/colorized-MNIST/training', transform=transform)\n",
        "testset = datasets.ImageFolder(root='/content/colorized-MNIST/testing', transform=transform)\n",
        "# Create DataLoader for training and testing datasets with specified batch sizes and other parameters\n",
        "trainloader = torch.utils.data.DataLoader(trainset, batch_size=8830, shuffle=True, num_workers=0)\n",
        "t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)\n",
        "# Get an iterator for the training DataLoader\n",
        "trainloader_iter = iter(trainloader)\n",
        "# Get the next batch of images and labels from the training DataLoader\n",
        "tl_image, tl_label = next(trainloader_iter)\n",
        "# Move the training images and labels to the GPU\n",
        "tl_image = tl_image.cuda()\n",
        "tl_label = tl_label.cuda()\n",
        "# Pass the training images through the neural network to get the outputs\n",
        "tl_imageoutputs = net(tl_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict_y = torch.max(tl_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)\n",
        "# Print the training accuracy\n",
        "print(\"train:\", float(accuracy))\n",
        "# Get an iterator for the testing DataLoader\n",
        "t_data_iter = iter(t_loader)\n",
        "# Get the next batch of images and labels from the testing DataLoader\n",
        "t_image, t_label = next(t_data_iter)\n",
        "# Move the testing images and labels to the GPU\n",
        "t_image = t_image.cuda()\n",
        "t_label = t_label.cuda()\n",
        "# Pass the testing images through the neural network to get the outputs\n",
        "t_imageoutputs = net(t_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict = torch.max(t_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)\n",
        "# Print the testing accuracy\n",
        "print(\"test:\", float(accuracy_t))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "aM03U63eUkX7"
      },
      "source": [
        "### **Network2 : use linear layers and active layers (not use GDP)**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "CoimgzmgUm9s"
      },
      "outputs": [],
      "source": [
        "import argparse\n",
        "import os\n",
        "import numpy as np\n",
        "import torchvision.transforms as transforms\n",
        "from torchvision.utils import save_image\n",
        "from torch.utils.data import DataLoader\n",
        "from torchvision import datasets\n",
        "from torch.autograd import Variable\n",
        "import torch.nn as nn\n",
        "import torch\n",
        "\n",
        "class LANet(nn.Module):\n",
        "    def __init__(self):  # Initialization function\n",
        "        super(LANet, self).__init__()\n",
        "\n",
        "        self.fc1 = nn.Linear(3*28*28, 1000)  # Define the first fully connected layer\n",
        "        self.fc2 = nn.Linear(1000, 500)  # Define the second fully connected layer\n",
        "        self.fc3 = nn.Linear(500, 100)  # Define the third fully connected layer\n",
        "        self.fc4 = nn.Linear(100, 50)  # Define the fourth fully connected layer\n",
        "        self.fc5 = nn.Linear(50, 25)  # Define the fifth fully connected layer\n",
        "        self.fc6 = nn.Linear(25, 20)  # Define the sixth fully connected layer\n",
        "        self.fc7 = nn.Linear(20, 10)  # Define the seventh fully connected layer\n",
        "\n",
        "    def forward(self, x):  # Define the forward pass\n",
        "\n",
        "        x = x.view(-1, 3*28*28)  # Flatten the input tensor\n",
        "        x = F.relu(self.fc1(x))  # Apply ReLU activation after the first layer\n",
        "        x = self.fc2(x)  # Apply the second layer\n",
        "        x = F.relu(self.fc3(x))  # Apply ReLU activation after the third layer\n",
        "        x = self.fc4(x)  # Apply the fourth layer\n",
        "        x = F.relu(self.fc5(x))  # Apply ReLU activation after the fifth layer\n",
        "        x = F.relu(self.fc6(x))  # Apply ReLU activation after the sixth layer\n",
        "        x = self.fc7(x)  # Apply the seventh layer\n",
        "        return x  # Return the output\n",
        "\n",
        "loss_function = torch.nn.CrossEntropyLoss()  # Define the loss function as CrossEntropyLoss\n",
        "net = LANet()  # Instantiate the LANet model\n",
        "# If a GPU is available, run in CUDA mode\n",
        "if torch.cuda.is_available():\n",
        "    net = net.cuda()  # Move the network to the GPU\n",
        "    loss_function = loss_function.cuda()  # Move the loss function to the GPU\n",
        "optimizer_L = torch.optim.Adam(net.parameters(), lr=0.001)  # Initialize the Adam optimizer with learning rate 0.001"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "czwqNQ7NU6xG",
        "outputId": "eb077310-d102-46e1-eadf-3b750f858747"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "[1,     5] train_loss: 2.319 train_accuracy: 0.109 test_accuracy: 0.118\n",
            "[1,    10] train_loss: 2.530 train_accuracy: 0.107 test_accuracy: 0.109\n",
            "[1,    15] train_loss: 2.318 train_accuracy: 0.111 test_accuracy: 0.121\n",
            "[1,    20] train_loss: 2.319 train_accuracy: 0.104 test_accuracy: 0.113\n",
            "[1,    25] train_loss: 2.325 train_accuracy: 0.101 test_accuracy: 0.108\n",
            "[1,    30] train_loss: 2.421 train_accuracy: 0.084 test_accuracy: 0.084\n",
            "[1,    35] train_loss: 2.363 train_accuracy: 0.099 test_accuracy: 0.109\n",
            "[1,    40] train_loss: 2.340 train_accuracy: 0.099 test_accuracy: 0.109\n",
            "[1,    45] train_loss: 2.337 train_accuracy: 0.108 test_accuracy: 0.117\n",
            "[1,    50] train_loss: 2.279 train_accuracy: 0.112 test_accuracy: 0.128\n",
            "[1,    55] train_loss: 2.318 train_accuracy: 0.107 test_accuracy: 0.118\n",
            "[1,    60] train_loss: 2.289 train_accuracy: 0.111 test_accuracy: 0.130\n",
            "[1,    65] train_loss: 2.290 train_accuracy: 0.102 test_accuracy: 0.115\n",
            "[1,    70] train_loss: 2.332 train_accuracy: 0.102 test_accuracy: 0.115\n",
            "[1,    75] train_loss: 2.362 train_accuracy: 0.105 test_accuracy: 0.118\n",
            "[1,    80] train_loss: 2.311 train_accuracy: 0.125 test_accuracy: 0.142\n",
            "[1,    85] train_loss: 2.220 train_accuracy: 0.112 test_accuracy: 0.124\n",
            "[1,    90] train_loss: 2.268 train_accuracy: 0.117 test_accuracy: 0.129\n",
            "[1,    95] train_loss: 2.203 train_accuracy: 0.133 test_accuracy: 0.147\n",
            "[1,   100] train_loss: 2.238 train_accuracy: 0.177 test_accuracy: 0.192\n",
            "[1,   105] train_loss: 2.241 train_accuracy: 0.138 test_accuracy: 0.147\n",
            "[1,   110] train_loss: 2.097 train_accuracy: 0.171 test_accuracy: 0.184\n",
            "[1,   115] train_loss: 2.145 train_accuracy: 0.127 test_accuracy: 0.104\n",
            "[1,   120] train_loss: 2.128 train_accuracy: 0.201 test_accuracy: 0.188\n",
            "[1,   125] train_loss: 2.322 train_accuracy: 0.206 test_accuracy: 0.206\n",
            "[1,   130] train_loss: 2.178 train_accuracy: 0.181 test_accuracy: 0.162\n",
            "[1,   135] train_loss: 2.227 train_accuracy: 0.152 test_accuracy: 0.164\n",
            "[1,   140] train_loss: 2.083 train_accuracy: 0.151 test_accuracy: 0.165\n",
            "[1,   145] train_loss: 2.025 train_accuracy: 0.214 test_accuracy: 0.237\n",
            "[1,   150] train_loss: 2.024 train_accuracy: 0.231 test_accuracy: 0.266\n",
            "[1,   155] train_loss: 2.162 train_accuracy: 0.244 test_accuracy: 0.258\n",
            "[1,   160] train_loss: 2.370 train_accuracy: 0.226 test_accuracy: 0.212\n",
            "[1,   165] train_loss: 2.186 train_accuracy: 0.209 test_accuracy: 0.215\n",
            "[1,   170] train_loss: 2.162 train_accuracy: 0.167 test_accuracy: 0.179\n",
            "[1,   175] train_loss: 2.122 train_accuracy: 0.186 test_accuracy: 0.210\n",
            "[1,   180] train_loss: 1.941 train_accuracy: 0.245 test_accuracy: 0.269\n",
            "[1,   185] train_loss: 2.116 train_accuracy: 0.223 test_accuracy: 0.245\n",
            "[1,   190] train_loss: 1.767 train_accuracy: 0.212 test_accuracy: 0.231\n",
            "[1,   195] train_loss: 1.839 train_accuracy: 0.214 test_accuracy: 0.235\n",
            "[1,   200] train_loss: 1.834 train_accuracy: 0.222 test_accuracy: 0.233\n",
            "[1,   205] train_loss: 2.565 train_accuracy: 0.216 test_accuracy: 0.211\n",
            "[1,   210] train_loss: 1.872 train_accuracy: 0.209 test_accuracy: 0.155\n",
            "[1,   215] train_loss: 1.691 train_accuracy: 0.255 test_accuracy: 0.227\n",
            "[1,   220] train_loss: 1.781 train_accuracy: 0.263 test_accuracy: 0.282\n",
            "[1,   225] train_loss: 1.816 train_accuracy: 0.243 test_accuracy: 0.257\n",
            "[1,   230] train_loss: 2.231 train_accuracy: 0.298 test_accuracy: 0.302\n",
            "[1,   235] train_loss: 1.940 train_accuracy: 0.304 test_accuracy: 0.328\n",
            "[1,   240] train_loss: 1.801 train_accuracy: 0.262 test_accuracy: 0.285\n",
            "[1,   245] train_loss: 1.600 train_accuracy: 0.279 test_accuracy: 0.309\n",
            "[1,   250] train_loss: 2.151 train_accuracy: 0.300 test_accuracy: 0.331\n",
            "[1,   255] train_loss: 1.711 train_accuracy: 0.267 test_accuracy: 0.299\n",
            "[1,   260] train_loss: 2.103 train_accuracy: 0.280 test_accuracy: 0.310\n",
            "[1,   265] train_loss: 1.746 train_accuracy: 0.321 test_accuracy: 0.350\n",
            "[1,   270] train_loss: 1.721 train_accuracy: 0.297 test_accuracy: 0.329\n",
            "[1,   275] train_loss: 1.752 train_accuracy: 0.275 test_accuracy: 0.313\n",
            "[1,   280] train_loss: 1.803 train_accuracy: 0.345 test_accuracy: 0.395\n",
            "[1,   285] train_loss: 2.124 train_accuracy: 0.297 test_accuracy: 0.327\n",
            "[1,   290] train_loss: 1.946 train_accuracy: 0.251 test_accuracy: 0.222\n",
            "[1,   295] train_loss: 2.053 train_accuracy: 0.291 test_accuracy: 0.261\n",
            "[1,   300] train_loss: 1.932 train_accuracy: 0.272 test_accuracy: 0.245\n",
            "[1,   305] train_loss: 1.857 train_accuracy: 0.290 test_accuracy: 0.239\n",
            "[1,   310] train_loss: 1.548 train_accuracy: 0.311 test_accuracy: 0.335\n",
            "[1,   315] train_loss: 1.823 train_accuracy: 0.348 test_accuracy: 0.383\n",
            "[1,   320] train_loss: 1.890 train_accuracy: 0.377 test_accuracy: 0.415\n",
            "[1,   325] train_loss: 1.750 train_accuracy: 0.345 test_accuracy: 0.364\n",
            "[1,   330] train_loss: 1.863 train_accuracy: 0.328 test_accuracy: 0.338\n",
            "[1,   335] train_loss: 1.892 train_accuracy: 0.377 test_accuracy: 0.375\n",
            "[1,   340] train_loss: 1.704 train_accuracy: 0.346 test_accuracy: 0.363\n",
            "[1,   345] train_loss: 2.086 train_accuracy: 0.353 test_accuracy: 0.391\n",
            "[1,   350] train_loss: 1.818 train_accuracy: 0.373 test_accuracy: 0.407\n",
            "[1,   355] train_loss: 1.413 train_accuracy: 0.374 test_accuracy: 0.400\n",
            "[1,   360] train_loss: 1.283 train_accuracy: 0.392 test_accuracy: 0.384\n",
            "[1,   365] train_loss: 1.299 train_accuracy: 0.391 test_accuracy: 0.405\n",
            "[1,   370] train_loss: 1.484 train_accuracy: 0.401 test_accuracy: 0.426\n",
            "[1,   375] train_loss: 1.784 train_accuracy: 0.427 test_accuracy: 0.448\n",
            "[1,   380] train_loss: 1.518 train_accuracy: 0.383 test_accuracy: 0.411\n",
            "[1,   385] train_loss: 1.491 train_accuracy: 0.368 test_accuracy: 0.403\n",
            "[1,   390] train_loss: 1.746 train_accuracy: 0.408 test_accuracy: 0.446\n",
            "[1,   395] train_loss: 1.820 train_accuracy: 0.407 test_accuracy: 0.449\n",
            "[1,   400] train_loss: 1.431 train_accuracy: 0.445 test_accuracy: 0.477\n",
            "[1,   405] train_loss: 1.479 train_accuracy: 0.407 test_accuracy: 0.424\n",
            "[1,   410] train_loss: 1.268 train_accuracy: 0.411 test_accuracy: 0.405\n",
            "[1,   415] train_loss: 1.203 train_accuracy: 0.446 test_accuracy: 0.481\n",
            "[1,   420] train_loss: 1.134 train_accuracy: 0.430 test_accuracy: 0.479\n",
            "[1,   425] train_loss: 1.779 train_accuracy: 0.446 test_accuracy: 0.484\n",
            "[1,   430] train_loss: 1.361 train_accuracy: 0.472 test_accuracy: 0.521\n",
            "[1,   435] train_loss: 1.451 train_accuracy: 0.427 test_accuracy: 0.478\n",
            "[1,   440] train_loss: 1.696 train_accuracy: 0.383 test_accuracy: 0.435\n",
            "[1,   445] train_loss: 1.414 train_accuracy: 0.478 test_accuracy: 0.514\n",
            "[1,   450] train_loss: 1.340 train_accuracy: 0.495 test_accuracy: 0.521\n",
            "[1,   455] train_loss: 1.150 train_accuracy: 0.470 test_accuracy: 0.497\n",
            "[1,   460] train_loss: 1.144 train_accuracy: 0.507 test_accuracy: 0.534\n",
            "[1,   465] train_loss: 1.257 train_accuracy: 0.510 test_accuracy: 0.550\n",
            "[1,   470] train_loss: 0.931 train_accuracy: 0.514 test_accuracy: 0.565\n",
            "[1,   475] train_loss: 1.219 train_accuracy: 0.506 test_accuracy: 0.553\n",
            "[1,   480] train_loss: 1.625 train_accuracy: 0.490 test_accuracy: 0.539\n",
            "[1,   485] train_loss: 1.353 train_accuracy: 0.468 test_accuracy: 0.495\n",
            "[1,   490] train_loss: 1.357 train_accuracy: 0.487 test_accuracy: 0.526\n",
            "[1,   495] train_loss: 1.283 train_accuracy: 0.452 test_accuracy: 0.498\n",
            "[1,   500] train_loss: 1.530 train_accuracy: 0.462 test_accuracy: 0.495\n",
            "[1,   505] train_loss: 1.301 train_accuracy: 0.434 test_accuracy: 0.460\n",
            "[1,   510] train_loss: 2.042 train_accuracy: 0.451 test_accuracy: 0.466\n",
            "[1,   515] train_loss: 1.754 train_accuracy: 0.456 test_accuracy: 0.467\n",
            "[1,   520] train_loss: 1.960 train_accuracy: 0.520 test_accuracy: 0.518\n",
            "[1,   525] train_loss: 1.385 train_accuracy: 0.492 test_accuracy: 0.505\n",
            "[1,   530] train_loss: 1.342 train_accuracy: 0.508 test_accuracy: 0.538\n",
            "[1,   535] train_loss: 1.754 train_accuracy: 0.500 test_accuracy: 0.522\n",
            "[1,   540] train_loss: 1.265 train_accuracy: 0.479 test_accuracy: 0.492\n",
            "[1,   545] train_loss: 1.494 train_accuracy: 0.519 test_accuracy: 0.533\n",
            "[1,   550] train_loss: 1.531 train_accuracy: 0.517 test_accuracy: 0.548\n",
            "[1,   555] train_loss: 1.301 train_accuracy: 0.491 test_accuracy: 0.538\n",
            "[1,   560] train_loss: 1.108 train_accuracy: 0.509 test_accuracy: 0.571\n",
            "[1,   565] train_loss: 1.366 train_accuracy: 0.512 test_accuracy: 0.578\n",
            "[1,   570] train_loss: 1.181 train_accuracy: 0.465 test_accuracy: 0.518\n",
            "[1,   575] train_loss: 1.107 train_accuracy: 0.515 test_accuracy: 0.565\n",
            "[1,   580] train_loss: 1.073 train_accuracy: 0.465 test_accuracy: 0.516\n",
            "[1,   585] train_loss: 1.287 train_accuracy: 0.530 test_accuracy: 0.587\n",
            "[1,   590] train_loss: 1.312 train_accuracy: 0.558 test_accuracy: 0.597\n",
            "[1,   595] train_loss: 1.225 train_accuracy: 0.544 test_accuracy: 0.584\n",
            "[1,   600] train_loss: 1.382 train_accuracy: 0.506 test_accuracy: 0.547\n",
            "[1,   605] train_loss: 1.590 train_accuracy: 0.493 test_accuracy: 0.514\n",
            "[1,   610] train_loss: 1.429 train_accuracy: 0.570 test_accuracy: 0.607\n",
            "[1,   615] train_loss: 0.891 train_accuracy: 0.510 test_accuracy: 0.537\n",
            "[1,   620] train_loss: 1.684 train_accuracy: 0.584 test_accuracy: 0.588\n",
            "[1,   625] train_loss: 1.159 train_accuracy: 0.542 test_accuracy: 0.585\n",
            "[1,   630] train_loss: 1.147 train_accuracy: 0.549 test_accuracy: 0.602\n",
            "[1,   635] train_loss: 1.182 train_accuracy: 0.586 test_accuracy: 0.630\n",
            "[1,   640] train_loss: 1.115 train_accuracy: 0.571 test_accuracy: 0.620\n",
            "[1,   645] train_loss: 1.063 train_accuracy: 0.476 test_accuracy: 0.522\n",
            "[1,   650] train_loss: 1.275 train_accuracy: 0.487 test_accuracy: 0.530\n",
            "[1,   655] train_loss: 1.106 train_accuracy: 0.466 test_accuracy: 0.523\n",
            "[1,   660] train_loss: 1.238 train_accuracy: 0.481 test_accuracy: 0.536\n",
            "[1,   665] train_loss: 1.591 train_accuracy: 0.550 test_accuracy: 0.615\n",
            "[1,   670] train_loss: 1.501 train_accuracy: 0.570 test_accuracy: 0.629\n",
            "[1,   675] train_loss: 1.394 train_accuracy: 0.581 test_accuracy: 0.628\n",
            "[1,   680] train_loss: 1.364 train_accuracy: 0.601 test_accuracy: 0.629\n",
            "[1,   685] train_loss: 1.108 train_accuracy: 0.625 test_accuracy: 0.645\n",
            "[1,   690] train_loss: 1.097 train_accuracy: 0.607 test_accuracy: 0.634\n",
            "[1,   695] train_loss: 1.322 train_accuracy: 0.579 test_accuracy: 0.620\n",
            "[1,   700] train_loss: 0.891 train_accuracy: 0.543 test_accuracy: 0.557\n",
            "[1,   705] train_loss: 1.563 train_accuracy: 0.629 test_accuracy: 0.642\n",
            "[1,   710] train_loss: 0.922 train_accuracy: 0.571 test_accuracy: 0.596\n",
            "[1,   715] train_loss: 1.287 train_accuracy: 0.573 test_accuracy: 0.598\n",
            "[1,   720] train_loss: 0.822 train_accuracy: 0.602 test_accuracy: 0.627\n",
            "[1,   725] train_loss: 0.942 train_accuracy: 0.621 test_accuracy: 0.648\n",
            "[1,   730] train_loss: 0.937 train_accuracy: 0.631 test_accuracy: 0.650\n",
            "[1,   735] train_loss: 0.745 train_accuracy: 0.627 test_accuracy: 0.639\n",
            "[1,   740] train_loss: 1.117 train_accuracy: 0.605 test_accuracy: 0.603\n",
            "[1,   745] train_loss: 1.316 train_accuracy: 0.620 test_accuracy: 0.641\n",
            "[1,   750] train_loss: 1.032 train_accuracy: 0.584 test_accuracy: 0.587\n",
            "[1,   755] train_loss: 0.879 train_accuracy: 0.555 test_accuracy: 0.548\n",
            "[1,   760] train_loss: 1.256 train_accuracy: 0.549 test_accuracy: 0.547\n",
            "[1,   765] train_loss: 1.460 train_accuracy: 0.519 test_accuracy: 0.529\n",
            "[1,   770] train_loss: 1.179 train_accuracy: 0.617 test_accuracy: 0.623\n",
            "[1,   775] train_loss: 1.236 train_accuracy: 0.564 test_accuracy: 0.582\n",
            "[1,   780] train_loss: 1.417 train_accuracy: 0.589 test_accuracy: 0.589\n",
            "[1,   785] train_loss: 1.599 train_accuracy: 0.624 test_accuracy: 0.637\n",
            "[1,   790] train_loss: 1.149 train_accuracy: 0.575 test_accuracy: 0.599\n",
            "[1,   795] train_loss: 1.216 train_accuracy: 0.596 test_accuracy: 0.618\n",
            "[1,   800] train_loss: 0.982 train_accuracy: 0.592 test_accuracy: 0.610\n",
            "[1,   805] train_loss: 0.984 train_accuracy: 0.593 test_accuracy: 0.590\n",
            "[1,   810] train_loss: 1.124 train_accuracy: 0.640 test_accuracy: 0.642\n",
            "[1,   815] train_loss: 0.768 train_accuracy: 0.620 test_accuracy: 0.628\n",
            "[1,   820] train_loss: 1.048 train_accuracy: 0.651 test_accuracy: 0.659\n",
            "[1,   825] train_loss: 0.949 train_accuracy: 0.661 test_accuracy: 0.718\n",
            "[1,   830] train_loss: 1.163 train_accuracy: 0.678 test_accuracy: 0.735\n",
            "[1,   835] train_loss: 0.850 train_accuracy: 0.655 test_accuracy: 0.717\n",
            "[1,   840] train_loss: 1.057 train_accuracy: 0.634 test_accuracy: 0.700\n",
            "[1,   845] train_loss: 0.914 train_accuracy: 0.679 test_accuracy: 0.702\n",
            "[1,   850] train_loss: 0.970 train_accuracy: 0.651 test_accuracy: 0.657\n",
            "[1,   855] train_loss: 0.974 train_accuracy: 0.669 test_accuracy: 0.661\n",
            "[1,   860] train_loss: 0.675 train_accuracy: 0.660 test_accuracy: 0.658\n",
            "[1,   865] train_loss: 1.042 train_accuracy: 0.671 test_accuracy: 0.677\n",
            "[1,   870] train_loss: 0.722 train_accuracy: 0.657 test_accuracy: 0.669\n",
            "[1,   875] train_loss: 1.086 train_accuracy: 0.639 test_accuracy: 0.646\n",
            "[1,   880] train_loss: 1.074 train_accuracy: 0.620 test_accuracy: 0.589\n",
            "[1,   885] train_loss: 1.358 train_accuracy: 0.636 test_accuracy: 0.609\n",
            "[1,   890] train_loss: 1.256 train_accuracy: 0.616 test_accuracy: 0.629\n",
            "[1,   895] train_loss: 1.235 train_accuracy: 0.635 test_accuracy: 0.634\n",
            "[1,   900] train_loss: 1.368 train_accuracy: 0.623 test_accuracy: 0.667\n",
            "[1,   905] train_loss: 1.152 train_accuracy: 0.636 test_accuracy: 0.677\n",
            "[1,   910] train_loss: 0.922 train_accuracy: 0.588 test_accuracy: 0.627\n",
            "[1,   915] train_loss: 0.890 train_accuracy: 0.643 test_accuracy: 0.672\n",
            "[1,   920] train_loss: 1.420 train_accuracy: 0.633 test_accuracy: 0.651\n",
            "[1,   925] train_loss: 1.386 train_accuracy: 0.602 test_accuracy: 0.617\n",
            "[1,   930] train_loss: 0.978 train_accuracy: 0.534 test_accuracy: 0.546\n",
            "[1,   935] train_loss: 0.842 train_accuracy: 0.551 test_accuracy: 0.571\n",
            "[1,   940] train_loss: 1.074 train_accuracy: 0.654 test_accuracy: 0.645\n",
            "[1,   945] train_loss: 1.119 train_accuracy: 0.686 test_accuracy: 0.684\n",
            "[1,   950] train_loss: 0.723 train_accuracy: 0.693 test_accuracy: 0.703\n",
            "[1,   955] train_loss: 0.757 train_accuracy: 0.692 test_accuracy: 0.698\n",
            "[1,   960] train_loss: 0.882 train_accuracy: 0.692 test_accuracy: 0.686\n",
            "[1,   965] train_loss: 0.631 train_accuracy: 0.639 test_accuracy: 0.641\n",
            "[1,   970] train_loss: 1.268 train_accuracy: 0.637 test_accuracy: 0.640\n",
            "[1,   975] train_loss: 1.655 train_accuracy: 0.541 test_accuracy: 0.551\n",
            "[1,   980] train_loss: 1.712 train_accuracy: 0.559 test_accuracy: 0.564\n",
            "[1,   985] train_loss: 1.114 train_accuracy: 0.582 test_accuracy: 0.603\n",
            "[1,   990] train_loss: 0.947 train_accuracy: 0.635 test_accuracy: 0.631\n",
            "[1,   995] train_loss: 1.243 train_accuracy: 0.593 test_accuracy: 0.591\n",
            "[1,  1000] train_loss: 1.333 train_accuracy: 0.626 test_accuracy: 0.603\n",
            "[1,  1005] train_loss: 0.795 train_accuracy: 0.659 test_accuracy: 0.644\n",
            "[1,  1010] train_loss: 0.844 train_accuracy: 0.681 test_accuracy: 0.677\n",
            "[1,  1015] train_loss: 1.309 train_accuracy: 0.670 test_accuracy: 0.681\n",
            "[1,  1020] train_loss: 0.960 train_accuracy: 0.650 test_accuracy: 0.679\n",
            "[1,  1025] train_loss: 1.268 train_accuracy: 0.661 test_accuracy: 0.693\n",
            "[1,  1030] train_loss: 0.710 train_accuracy: 0.693 test_accuracy: 0.726\n",
            "[1,  1035] train_loss: 0.824 train_accuracy: 0.716 test_accuracy: 0.744\n",
            "[1,  1040] train_loss: 0.623 train_accuracy: 0.717 test_accuracy: 0.737\n",
            "[1,  1045] train_loss: 0.576 train_accuracy: 0.706 test_accuracy: 0.720\n",
            "[1,  1050] train_loss: 1.085 train_accuracy: 0.670 test_accuracy: 0.682\n",
            "[1,  1055] train_loss: 1.084 train_accuracy: 0.673 test_accuracy: 0.682\n",
            "[1,  1060] train_loss: 0.683 train_accuracy: 0.697 test_accuracy: 0.700\n",
            "[1,  1065] train_loss: 0.612 train_accuracy: 0.716 test_accuracy: 0.738\n",
            "[1,  1070] train_loss: 1.038 train_accuracy: 0.716 test_accuracy: 0.738\n",
            "[1,  1075] train_loss: 1.348 train_accuracy: 0.721 test_accuracy: 0.743\n",
            "[1,  1080] train_loss: 1.175 train_accuracy: 0.694 test_accuracy: 0.700\n",
            "[1,  1085] train_loss: 0.890 train_accuracy: 0.661 test_accuracy: 0.682\n",
            "[1,  1090] train_loss: 0.791 train_accuracy: 0.654 test_accuracy: 0.680\n",
            "[1,  1095] train_loss: 1.270 train_accuracy: 0.700 test_accuracy: 0.729\n",
            "[1,  1100] train_loss: 1.279 train_accuracy: 0.672 test_accuracy: 0.682\n",
            "[1,  1105] train_loss: 0.942 train_accuracy: 0.685 test_accuracy: 0.654\n",
            "[1,  1110] train_loss: 0.985 train_accuracy: 0.695 test_accuracy: 0.665\n",
            "[1,  1115] train_loss: 1.186 train_accuracy: 0.739 test_accuracy: 0.735\n",
            "[1,  1120] train_loss: 0.849 train_accuracy: 0.727 test_accuracy: 0.762\n",
            "[1,  1125] train_loss: 0.758 train_accuracy: 0.714 test_accuracy: 0.773\n",
            "[1,  1130] train_loss: 0.840 train_accuracy: 0.699 test_accuracy: 0.767\n",
            "[1,  1135] train_loss: 1.106 train_accuracy: 0.720 test_accuracy: 0.777\n",
            "[1,  1140] train_loss: 0.370 train_accuracy: 0.710 test_accuracy: 0.753\n",
            "[1,  1145] train_loss: 0.893 train_accuracy: 0.709 test_accuracy: 0.732\n",
            "[1,  1150] train_loss: 0.436 train_accuracy: 0.673 test_accuracy: 0.674\n",
            "[1,  1155] train_loss: 0.674 train_accuracy: 0.688 test_accuracy: 0.690\n",
            "[1,  1160] train_loss: 0.803 train_accuracy: 0.713 test_accuracy: 0.722\n",
            "[1,  1165] train_loss: 0.983 train_accuracy: 0.663 test_accuracy: 0.711\n",
            "[1,  1170] train_loss: 0.685 train_accuracy: 0.695 test_accuracy: 0.735\n",
            "[1,  1175] train_loss: 0.855 train_accuracy: 0.686 test_accuracy: 0.734\n",
            "[1,  1180] train_loss: 0.965 train_accuracy: 0.642 test_accuracy: 0.670\n",
            "[1,  1185] train_loss: 0.534 train_accuracy: 0.629 test_accuracy: 0.659\n",
            "[1,  1190] train_loss: 0.974 train_accuracy: 0.641 test_accuracy: 0.651\n",
            "[1,  1195] train_loss: 0.497 train_accuracy: 0.681 test_accuracy: 0.678\n",
            "[1,  1200] train_loss: 1.029 train_accuracy: 0.712 test_accuracy: 0.715\n",
            "[1,  1205] train_loss: 1.185 train_accuracy: 0.659 test_accuracy: 0.644\n",
            "[1,  1210] train_loss: 1.248 train_accuracy: 0.640 test_accuracy: 0.637\n",
            "[1,  1215] train_loss: 1.289 train_accuracy: 0.683 test_accuracy: 0.694\n",
            "[1,  1220] train_loss: 1.041 train_accuracy: 0.667 test_accuracy: 0.695\n",
            "[1,  1225] train_loss: 0.970 train_accuracy: 0.672 test_accuracy: 0.697\n",
            "[1,  1230] train_loss: 0.949 train_accuracy: 0.666 test_accuracy: 0.682\n",
            "[1,  1235] train_loss: 1.158 train_accuracy: 0.689 test_accuracy: 0.701\n",
            "[1,  1240] train_loss: 0.868 train_accuracy: 0.716 test_accuracy: 0.710\n",
            "[1,  1245] train_loss: 0.818 train_accuracy: 0.708 test_accuracy: 0.699\n",
            "[1,  1250] train_loss: 0.549 train_accuracy: 0.715 test_accuracy: 0.719\n",
            "[1,  1255] train_loss: 1.038 train_accuracy: 0.744 test_accuracy: 0.760\n",
            "[1,  1260] train_loss: 0.557 train_accuracy: 0.739 test_accuracy: 0.761\n",
            "[1,  1265] train_loss: 0.954 train_accuracy: 0.734 test_accuracy: 0.747\n",
            "[1,  1270] train_loss: 0.710 train_accuracy: 0.738 test_accuracy: 0.756\n",
            "[1,  1275] train_loss: 0.930 train_accuracy: 0.750 test_accuracy: 0.761\n",
            "[1,  1280] train_loss: 1.231 train_accuracy: 0.751 test_accuracy: 0.746\n",
            "[1,  1285] train_loss: 0.831 train_accuracy: 0.758 test_accuracy: 0.760\n",
            "[1,  1290] train_loss: 0.869 train_accuracy: 0.743 test_accuracy: 0.770\n",
            "[1,  1295] train_loss: 1.028 train_accuracy: 0.758 test_accuracy: 0.795\n",
            "[1,  1300] train_loss: 0.689 train_accuracy: 0.719 test_accuracy: 0.750\n",
            "[1,  1305] train_loss: 0.790 train_accuracy: 0.697 test_accuracy: 0.729\n",
            "[1,  1310] train_loss: 0.861 train_accuracy: 0.689 test_accuracy: 0.712\n",
            "[1,  1315] train_loss: 0.734 train_accuracy: 0.679 test_accuracy: 0.697\n",
            "[1,  1320] train_loss: 0.854 train_accuracy: 0.714 test_accuracy: 0.711\n",
            "[1,  1325] train_loss: 0.377 train_accuracy: 0.709 test_accuracy: 0.702\n",
            "[1,  1330] train_loss: 0.682 train_accuracy: 0.702 test_accuracy: 0.711\n",
            "[1,  1335] train_loss: 0.819 train_accuracy: 0.734 test_accuracy: 0.757\n",
            "[1,  1340] train_loss: 0.664 train_accuracy: 0.756 test_accuracy: 0.772\n",
            "[1,  1345] train_loss: 0.772 train_accuracy: 0.776 test_accuracy: 0.782\n",
            "[1,  1350] train_loss: 0.858 train_accuracy: 0.762 test_accuracy: 0.779\n",
            "[1,  1355] train_loss: 0.897 train_accuracy: 0.775 test_accuracy: 0.783\n",
            "[1,  1360] train_loss: 0.703 train_accuracy: 0.767 test_accuracy: 0.769\n",
            "[1,  1365] train_loss: 0.670 train_accuracy: 0.746 test_accuracy: 0.742\n",
            "[1,  1370] train_loss: 0.991 train_accuracy: 0.761 test_accuracy: 0.749\n",
            "[1,  1375] train_loss: 0.646 train_accuracy: 0.750 test_accuracy: 0.744\n",
            "[1,  1380] train_loss: 0.735 train_accuracy: 0.761 test_accuracy: 0.776\n",
            "[1,  1385] train_loss: 0.784 train_accuracy: 0.773 test_accuracy: 0.782\n",
            "[1,  1390] train_loss: 1.043 train_accuracy: 0.768 test_accuracy: 0.765\n",
            "[1,  1395] train_loss: 1.104 train_accuracy: 0.748 test_accuracy: 0.738\n",
            "[1,  1400] train_loss: 0.599 train_accuracy: 0.686 test_accuracy: 0.671\n",
            "[1,  1405] train_loss: 0.516 train_accuracy: 0.682 test_accuracy: 0.660\n",
            "[1,  1410] train_loss: 0.736 train_accuracy: 0.707 test_accuracy: 0.695\n",
            "[1,  1415] train_loss: 0.814 train_accuracy: 0.698 test_accuracy: 0.729\n",
            "[1,  1420] train_loss: 1.110 train_accuracy: 0.679 test_accuracy: 0.725\n",
            "[1,  1425] train_loss: 1.234 train_accuracy: 0.740 test_accuracy: 0.771\n",
            "[1,  1430] train_loss: 0.894 train_accuracy: 0.693 test_accuracy: 0.740\n",
            "[1,  1435] train_loss: 0.629 train_accuracy: 0.770 test_accuracy: 0.774\n",
            "[1,  1440] train_loss: 0.722 train_accuracy: 0.780 test_accuracy: 0.789\n",
            "[1,  1445] train_loss: 0.753 train_accuracy: 0.736 test_accuracy: 0.766\n",
            "[1,  1450] train_loss: 0.472 train_accuracy: 0.701 test_accuracy: 0.735\n",
            "[1,  1455] train_loss: 1.047 train_accuracy: 0.736 test_accuracy: 0.757\n",
            "[1,  1460] train_loss: 0.797 train_accuracy: 0.747 test_accuracy: 0.764\n",
            "[1,  1465] train_loss: 0.687 train_accuracy: 0.704 test_accuracy: 0.721\n",
            "[1,  1470] train_loss: 0.361 train_accuracy: 0.748 test_accuracy: 0.743\n",
            "[1,  1475] train_loss: 0.845 train_accuracy: 0.782 test_accuracy: 0.778\n",
            "[1,  1480] train_loss: 1.004 train_accuracy: 0.802 test_accuracy: 0.812\n",
            "[1,  1485] train_loss: 0.462 train_accuracy: 0.794 test_accuracy: 0.807\n",
            "[1,  1490] train_loss: 0.942 train_accuracy: 0.787 test_accuracy: 0.792\n",
            "[1,  1495] train_loss: 0.541 train_accuracy: 0.789 test_accuracy: 0.790\n",
            "[1,  1500] train_loss: 0.835 train_accuracy: 0.797 test_accuracy: 0.789\n",
            "[1,  1505] train_loss: 0.680 train_accuracy: 0.802 test_accuracy: 0.795\n",
            "[1,  1510] train_loss: 1.181 train_accuracy: 0.814 test_accuracy: 0.815\n",
            "[1,  1515] train_loss: 0.663 train_accuracy: 0.807 test_accuracy: 0.813\n",
            "[1,  1520] train_loss: 0.653 train_accuracy: 0.782 test_accuracy: 0.791\n",
            "[1,  1525] train_loss: 0.550 train_accuracy: 0.780 test_accuracy: 0.788\n",
            "[1,  1530] train_loss: 0.522 train_accuracy: 0.765 test_accuracy: 0.746\n",
            "[1,  1535] train_loss: 0.898 train_accuracy: 0.732 test_accuracy: 0.729\n",
            "[1,  1540] train_loss: 0.585 train_accuracy: 0.745 test_accuracy: 0.748\n",
            "[1,  1545] train_loss: 1.431 train_accuracy: 0.709 test_accuracy: 0.726\n",
            "[1,  1550] train_loss: 0.696 train_accuracy: 0.781 test_accuracy: 0.778\n",
            "[1,  1555] train_loss: 1.072 train_accuracy: 0.791 test_accuracy: 0.787\n",
            "[1,  1560] train_loss: 0.402 train_accuracy: 0.788 test_accuracy: 0.794\n",
            "[1,  1565] train_loss: 0.555 train_accuracy: 0.785 test_accuracy: 0.815\n",
            "[1,  1570] train_loss: 0.603 train_accuracy: 0.751 test_accuracy: 0.795\n",
            "[1,  1575] train_loss: 0.366 train_accuracy: 0.761 test_accuracy: 0.783\n",
            "[1,  1580] train_loss: 0.387 train_accuracy: 0.773 test_accuracy: 0.786\n",
            "[1,  1585] train_loss: 0.738 train_accuracy: 0.757 test_accuracy: 0.790\n",
            "[1,  1590] train_loss: 0.874 train_accuracy: 0.756 test_accuracy: 0.765\n",
            "[1,  1595] train_loss: 0.590 train_accuracy: 0.704 test_accuracy: 0.715\n",
            "[1,  1600] train_loss: 0.750 train_accuracy: 0.721 test_accuracy: 0.713\n",
            "[1,  1605] train_loss: 0.903 train_accuracy: 0.744 test_accuracy: 0.725\n",
            "[1,  1610] train_loss: 0.753 train_accuracy: 0.745 test_accuracy: 0.719\n",
            "[1,  1615] train_loss: 0.458 train_accuracy: 0.736 test_accuracy: 0.713\n",
            "[1,  1620] train_loss: 0.669 train_accuracy: 0.749 test_accuracy: 0.742\n",
            "[1,  1625] train_loss: 0.905 train_accuracy: 0.758 test_accuracy: 0.761\n",
            "[1,  1630] train_loss: 0.637 train_accuracy: 0.760 test_accuracy: 0.775\n",
            "[1,  1635] train_loss: 0.779 train_accuracy: 0.778 test_accuracy: 0.797\n",
            "[1,  1640] train_loss: 0.871 train_accuracy: 0.767 test_accuracy: 0.792\n",
            "[1,  1645] train_loss: 0.405 train_accuracy: 0.771 test_accuracy: 0.806\n",
            "[1,  1650] train_loss: 0.779 train_accuracy: 0.766 test_accuracy: 0.791\n",
            "[1,  1655] train_loss: 0.827 train_accuracy: 0.778 test_accuracy: 0.792\n",
            "[1,  1660] train_loss: 0.789 train_accuracy: 0.776 test_accuracy: 0.769\n",
            "[1,  1665] train_loss: 0.479 train_accuracy: 0.794 test_accuracy: 0.780\n",
            "[1,  1670] train_loss: 0.542 train_accuracy: 0.806 test_accuracy: 0.786\n",
            "[1,  1675] train_loss: 0.530 train_accuracy: 0.802 test_accuracy: 0.795\n",
            "[1,  1680] train_loss: 0.568 train_accuracy: 0.831 test_accuracy: 0.824\n",
            "[1,  1685] train_loss: 0.440 train_accuracy: 0.806 test_accuracy: 0.809\n",
            "[1,  1690] train_loss: 0.763 train_accuracy: 0.798 test_accuracy: 0.806\n",
            "[1,  1695] train_loss: 0.408 train_accuracy: 0.800 test_accuracy: 0.795\n",
            "[1,  1700] train_loss: 0.466 train_accuracy: 0.813 test_accuracy: 0.799\n",
            "[1,  1705] train_loss: 0.563 train_accuracy: 0.770 test_accuracy: 0.750\n",
            "[1,  1710] train_loss: 0.697 train_accuracy: 0.709 test_accuracy: 0.674\n",
            "[1,  1715] train_loss: 0.774 train_accuracy: 0.786 test_accuracy: 0.760\n",
            "[1,  1720] train_loss: 0.569 train_accuracy: 0.805 test_accuracy: 0.788\n",
            "[1,  1725] train_loss: 0.464 train_accuracy: 0.787 test_accuracy: 0.775\n",
            "[2,     5] train_loss: 0.851 train_accuracy: 0.799 test_accuracy: 0.801\n",
            "[2,    10] train_loss: 0.356 train_accuracy: 0.764 test_accuracy: 0.776\n",
            "[2,    15] train_loss: 0.845 train_accuracy: 0.764 test_accuracy: 0.755\n",
            "[2,    20] train_loss: 0.459 train_accuracy: 0.762 test_accuracy: 0.752\n",
            "[2,    25] train_loss: 0.343 train_accuracy: 0.757 test_accuracy: 0.752\n",
            "[2,    30] train_loss: 1.235 train_accuracy: 0.728 test_accuracy: 0.748\n",
            "[2,    35] train_loss: 0.982 train_accuracy: 0.756 test_accuracy: 0.774\n",
            "[2,    40] train_loss: 0.472 train_accuracy: 0.801 test_accuracy: 0.815\n",
            "[2,    45] train_loss: 0.758 train_accuracy: 0.766 test_accuracy: 0.790\n",
            "[2,    50] train_loss: 0.895 train_accuracy: 0.726 test_accuracy: 0.749\n",
            "[2,    55] train_loss: 0.936 train_accuracy: 0.786 test_accuracy: 0.788\n",
            "[2,    60] train_loss: 0.788 train_accuracy: 0.793 test_accuracy: 0.792\n",
            "[2,    65] train_loss: 1.393 train_accuracy: 0.787 test_accuracy: 0.785\n",
            "[2,    70] train_loss: 0.522 train_accuracy: 0.760 test_accuracy: 0.765\n",
            "[2,    75] train_loss: 0.543 train_accuracy: 0.754 test_accuracy: 0.758\n",
            "[2,    80] train_loss: 1.153 train_accuracy: 0.790 test_accuracy: 0.776\n",
            "[2,    85] train_loss: 0.794 train_accuracy: 0.814 test_accuracy: 0.795\n",
            "[2,    90] train_loss: 0.362 train_accuracy: 0.801 test_accuracy: 0.786\n",
            "[2,    95] train_loss: 0.796 train_accuracy: 0.792 test_accuracy: 0.784\n",
            "[2,   100] train_loss: 0.355 train_accuracy: 0.800 test_accuracy: 0.799\n",
            "[2,   105] train_loss: 0.825 train_accuracy: 0.800 test_accuracy: 0.815\n",
            "[2,   110] train_loss: 0.423 train_accuracy: 0.791 test_accuracy: 0.798\n",
            "[2,   115] train_loss: 0.454 train_accuracy: 0.778 test_accuracy: 0.773\n",
            "[2,   120] train_loss: 0.805 train_accuracy: 0.772 test_accuracy: 0.765\n",
            "[2,   125] train_loss: 0.694 train_accuracy: 0.769 test_accuracy: 0.780\n",
            "[2,   130] train_loss: 0.577 train_accuracy: 0.794 test_accuracy: 0.807\n",
            "[2,   135] train_loss: 0.140 train_accuracy: 0.807 test_accuracy: 0.814\n",
            "[2,   140] train_loss: 1.000 train_accuracy: 0.804 test_accuracy: 0.809\n",
            "[2,   145] train_loss: 0.491 train_accuracy: 0.788 test_accuracy: 0.806\n",
            "[2,   150] train_loss: 0.659 train_accuracy: 0.771 test_accuracy: 0.785\n",
            "[2,   155] train_loss: 0.686 train_accuracy: 0.808 test_accuracy: 0.802\n",
            "[2,   160] train_loss: 0.706 train_accuracy: 0.784 test_accuracy: 0.808\n",
            "[2,   165] train_loss: 0.771 train_accuracy: 0.748 test_accuracy: 0.786\n",
            "[2,   170] train_loss: 0.447 train_accuracy: 0.785 test_accuracy: 0.800\n",
            "[2,   175] train_loss: 0.323 train_accuracy: 0.809 test_accuracy: 0.810\n",
            "[2,   180] train_loss: 0.605 train_accuracy: 0.823 test_accuracy: 0.821\n",
            "[2,   185] train_loss: 0.850 train_accuracy: 0.828 test_accuracy: 0.840\n",
            "[2,   190] train_loss: 0.258 train_accuracy: 0.807 test_accuracy: 0.815\n",
            "[2,   195] train_loss: 0.975 train_accuracy: 0.799 test_accuracy: 0.794\n",
            "[2,   200] train_loss: 0.509 train_accuracy: 0.787 test_accuracy: 0.767\n",
            "[2,   205] train_loss: 0.977 train_accuracy: 0.787 test_accuracy: 0.765\n",
            "[2,   210] train_loss: 0.648 train_accuracy: 0.799 test_accuracy: 0.783\n",
            "[2,   215] train_loss: 0.438 train_accuracy: 0.797 test_accuracy: 0.786\n",
            "[2,   220] train_loss: 0.523 train_accuracy: 0.811 test_accuracy: 0.795\n",
            "[2,   225] train_loss: 0.543 train_accuracy: 0.824 test_accuracy: 0.807\n",
            "[2,   230] train_loss: 0.428 train_accuracy: 0.831 test_accuracy: 0.809\n",
            "[2,   235] train_loss: 0.415 train_accuracy: 0.834 test_accuracy: 0.821\n",
            "[2,   240] train_loss: 0.561 train_accuracy: 0.833 test_accuracy: 0.826\n",
            "[2,   245] train_loss: 0.646 train_accuracy: 0.792 test_accuracy: 0.793\n",
            "[2,   250] train_loss: 0.684 train_accuracy: 0.806 test_accuracy: 0.813\n",
            "[2,   255] train_loss: 0.739 train_accuracy: 0.839 test_accuracy: 0.826\n",
            "[2,   260] train_loss: 0.630 train_accuracy: 0.827 test_accuracy: 0.814\n",
            "[2,   265] train_loss: 0.913 train_accuracy: 0.813 test_accuracy: 0.805\n",
            "[2,   270] train_loss: 0.809 train_accuracy: 0.837 test_accuracy: 0.833\n",
            "[2,   275] train_loss: 0.766 train_accuracy: 0.832 test_accuracy: 0.826\n",
            "[2,   280] train_loss: 0.408 train_accuracy: 0.829 test_accuracy: 0.827\n",
            "[2,   285] train_loss: 0.430 train_accuracy: 0.831 test_accuracy: 0.830\n",
            "[2,   290] train_loss: 0.873 train_accuracy: 0.836 test_accuracy: 0.828\n",
            "[2,   295] train_loss: 0.461 train_accuracy: 0.829 test_accuracy: 0.817\n",
            "[2,   300] train_loss: 0.498 train_accuracy: 0.825 test_accuracy: 0.812\n",
            "[2,   305] train_loss: 0.780 train_accuracy: 0.845 test_accuracy: 0.836\n",
            "[2,   310] train_loss: 0.427 train_accuracy: 0.846 test_accuracy: 0.844\n",
            "[2,   315] train_loss: 0.597 train_accuracy: 0.837 test_accuracy: 0.831\n",
            "[2,   320] train_loss: 0.602 train_accuracy: 0.851 test_accuracy: 0.852\n",
            "[2,   325] train_loss: 0.273 train_accuracy: 0.847 test_accuracy: 0.855\n",
            "[2,   330] train_loss: 0.294 train_accuracy: 0.836 test_accuracy: 0.841\n",
            "[2,   335] train_loss: 0.653 train_accuracy: 0.839 test_accuracy: 0.835\n",
            "[2,   340] train_loss: 0.502 train_accuracy: 0.857 test_accuracy: 0.849\n",
            "[2,   345] train_loss: 0.245 train_accuracy: 0.832 test_accuracy: 0.823\n",
            "[2,   350] train_loss: 1.006 train_accuracy: 0.816 test_accuracy: 0.812\n",
            "[2,   355] train_loss: 0.301 train_accuracy: 0.798 test_accuracy: 0.775\n",
            "[2,   360] train_loss: 0.214 train_accuracy: 0.774 test_accuracy: 0.735\n",
            "[2,   365] train_loss: 0.782 train_accuracy: 0.776 test_accuracy: 0.784\n",
            "[2,   370] train_loss: 1.374 train_accuracy: 0.807 test_accuracy: 0.806\n",
            "[2,   375] train_loss: 0.628 train_accuracy: 0.760 test_accuracy: 0.746\n",
            "[2,   380] train_loss: 0.578 train_accuracy: 0.778 test_accuracy: 0.787\n",
            "[2,   385] train_loss: 0.324 train_accuracy: 0.821 test_accuracy: 0.831\n",
            "[2,   390] train_loss: 0.411 train_accuracy: 0.799 test_accuracy: 0.828\n",
            "[2,   395] train_loss: 1.180 train_accuracy: 0.799 test_accuracy: 0.819\n",
            "[2,   400] train_loss: 0.769 train_accuracy: 0.819 test_accuracy: 0.831\n",
            "[2,   405] train_loss: 0.541 train_accuracy: 0.822 test_accuracy: 0.827\n",
            "[2,   410] train_loss: 0.391 train_accuracy: 0.809 test_accuracy: 0.808\n",
            "[2,   415] train_loss: 0.870 train_accuracy: 0.809 test_accuracy: 0.802\n",
            "[2,   420] train_loss: 0.533 train_accuracy: 0.839 test_accuracy: 0.827\n",
            "[2,   425] train_loss: 0.611 train_accuracy: 0.811 test_accuracy: 0.805\n",
            "[2,   430] train_loss: 0.450 train_accuracy: 0.798 test_accuracy: 0.797\n",
            "[2,   435] train_loss: 0.627 train_accuracy: 0.782 test_accuracy: 0.790\n",
            "[2,   440] train_loss: 0.739 train_accuracy: 0.785 test_accuracy: 0.804\n",
            "[2,   445] train_loss: 0.925 train_accuracy: 0.817 test_accuracy: 0.820\n",
            "[2,   450] train_loss: 0.820 train_accuracy: 0.840 test_accuracy: 0.827\n",
            "[2,   455] train_loss: 0.528 train_accuracy: 0.818 test_accuracy: 0.798\n",
            "[2,   460] train_loss: 0.883 train_accuracy: 0.796 test_accuracy: 0.771\n",
            "[2,   465] train_loss: 0.484 train_accuracy: 0.811 test_accuracy: 0.793\n",
            "[2,   470] train_loss: 1.307 train_accuracy: 0.832 test_accuracy: 0.811\n",
            "[2,   475] train_loss: 0.487 train_accuracy: 0.819 test_accuracy: 0.801\n",
            "[2,   480] train_loss: 0.728 train_accuracy: 0.807 test_accuracy: 0.801\n",
            "[2,   485] train_loss: 0.581 train_accuracy: 0.806 test_accuracy: 0.794\n",
            "[2,   490] train_loss: 0.840 train_accuracy: 0.811 test_accuracy: 0.806\n",
            "[2,   495] train_loss: 0.862 train_accuracy: 0.823 test_accuracy: 0.816\n",
            "[2,   500] train_loss: 0.787 train_accuracy: 0.796 test_accuracy: 0.788\n",
            "[2,   505] train_loss: 0.941 train_accuracy: 0.806 test_accuracy: 0.789\n",
            "[2,   510] train_loss: 0.587 train_accuracy: 0.786 test_accuracy: 0.795\n",
            "[2,   515] train_loss: 0.722 train_accuracy: 0.786 test_accuracy: 0.808\n",
            "[2,   520] train_loss: 0.553 train_accuracy: 0.829 test_accuracy: 0.846\n",
            "[2,   525] train_loss: 0.535 train_accuracy: 0.843 test_accuracy: 0.852\n",
            "[2,   530] train_loss: 0.630 train_accuracy: 0.836 test_accuracy: 0.852\n",
            "[2,   535] train_loss: 0.954 train_accuracy: 0.859 test_accuracy: 0.860\n",
            "[2,   540] train_loss: 0.201 train_accuracy: 0.853 test_accuracy: 0.848\n",
            "[2,   545] train_loss: 0.750 train_accuracy: 0.846 test_accuracy: 0.837\n",
            "[2,   550] train_loss: 0.586 train_accuracy: 0.832 test_accuracy: 0.822\n",
            "[2,   555] train_loss: 0.619 train_accuracy: 0.844 test_accuracy: 0.843\n",
            "[2,   560] train_loss: 0.339 train_accuracy: 0.842 test_accuracy: 0.844\n",
            "[2,   565] train_loss: 0.374 train_accuracy: 0.815 test_accuracy: 0.833\n",
            "[2,   570] train_loss: 0.961 train_accuracy: 0.797 test_accuracy: 0.821\n",
            "[2,   575] train_loss: 0.530 train_accuracy: 0.863 test_accuracy: 0.860\n",
            "[2,   580] train_loss: 0.571 train_accuracy: 0.852 test_accuracy: 0.846\n",
            "[2,   585] train_loss: 0.482 train_accuracy: 0.824 test_accuracy: 0.813\n",
            "[2,   590] train_loss: 0.377 train_accuracy: 0.815 test_accuracy: 0.800\n",
            "[2,   595] train_loss: 0.922 train_accuracy: 0.848 test_accuracy: 0.834\n",
            "[2,   600] train_loss: 0.593 train_accuracy: 0.824 test_accuracy: 0.808\n",
            "[2,   605] train_loss: 0.887 train_accuracy: 0.795 test_accuracy: 0.794\n",
            "[2,   610] train_loss: 0.315 train_accuracy: 0.822 test_accuracy: 0.811\n",
            "[2,   615] train_loss: 0.420 train_accuracy: 0.828 test_accuracy: 0.814\n",
            "[2,   620] train_loss: 0.445 train_accuracy: 0.807 test_accuracy: 0.813\n",
            "[2,   625] train_loss: 0.456 train_accuracy: 0.824 test_accuracy: 0.821\n",
            "[2,   630] train_loss: 0.324 train_accuracy: 0.822 test_accuracy: 0.824\n",
            "[2,   635] train_loss: 0.424 train_accuracy: 0.789 test_accuracy: 0.808\n",
            "[2,   640] train_loss: 0.531 train_accuracy: 0.798 test_accuracy: 0.815\n",
            "[2,   645] train_loss: 0.288 train_accuracy: 0.840 test_accuracy: 0.837\n",
            "[2,   650] train_loss: 0.476 train_accuracy: 0.857 test_accuracy: 0.840\n",
            "[2,   655] train_loss: 0.354 train_accuracy: 0.834 test_accuracy: 0.826\n",
            "[2,   660] train_loss: 0.738 train_accuracy: 0.844 test_accuracy: 0.834\n",
            "[2,   665] train_loss: 0.679 train_accuracy: 0.840 test_accuracy: 0.834\n",
            "[2,   670] train_loss: 0.352 train_accuracy: 0.827 test_accuracy: 0.830\n",
            "[2,   675] train_loss: 0.530 train_accuracy: 0.846 test_accuracy: 0.827\n",
            "[2,   680] train_loss: 1.150 train_accuracy: 0.842 test_accuracy: 0.817\n",
            "[2,   685] train_loss: 0.981 train_accuracy: 0.838 test_accuracy: 0.807\n",
            "[2,   690] train_loss: 0.564 train_accuracy: 0.825 test_accuracy: 0.795\n",
            "[2,   695] train_loss: 0.704 train_accuracy: 0.817 test_accuracy: 0.794\n",
            "[2,   700] train_loss: 1.045 train_accuracy: 0.786 test_accuracy: 0.761\n",
            "[2,   705] train_loss: 0.755 train_accuracy: 0.821 test_accuracy: 0.805\n",
            "[2,   710] train_loss: 0.580 train_accuracy: 0.799 test_accuracy: 0.782\n",
            "[2,   715] train_loss: 0.983 train_accuracy: 0.798 test_accuracy: 0.788\n",
            "[2,   720] train_loss: 0.716 train_accuracy: 0.808 test_accuracy: 0.789\n",
            "[2,   725] train_loss: 0.617 train_accuracy: 0.823 test_accuracy: 0.806\n",
            "[2,   730] train_loss: 0.331 train_accuracy: 0.834 test_accuracy: 0.819\n",
            "[2,   735] train_loss: 0.998 train_accuracy: 0.815 test_accuracy: 0.806\n",
            "[2,   740] train_loss: 0.688 train_accuracy: 0.799 test_accuracy: 0.795\n",
            "[2,   745] train_loss: 0.590 train_accuracy: 0.811 test_accuracy: 0.812\n",
            "[2,   750] train_loss: 0.403 train_accuracy: 0.838 test_accuracy: 0.839\n",
            "[2,   755] train_loss: 0.262 train_accuracy: 0.843 test_accuracy: 0.855\n",
            "[2,   760] train_loss: 0.272 train_accuracy: 0.825 test_accuracy: 0.851\n",
            "[2,   765] train_loss: 0.943 train_accuracy: 0.840 test_accuracy: 0.857\n",
            "[2,   770] train_loss: 0.381 train_accuracy: 0.840 test_accuracy: 0.856\n",
            "[2,   775] train_loss: 0.740 train_accuracy: 0.859 test_accuracy: 0.850\n",
            "[2,   780] train_loss: 1.144 train_accuracy: 0.852 test_accuracy: 0.830\n",
            "[2,   785] train_loss: 0.446 train_accuracy: 0.850 test_accuracy: 0.836\n",
            "[2,   790] train_loss: 0.485 train_accuracy: 0.850 test_accuracy: 0.844\n",
            "[2,   795] train_loss: 0.419 train_accuracy: 0.855 test_accuracy: 0.851\n",
            "[2,   800] train_loss: 0.356 train_accuracy: 0.843 test_accuracy: 0.837\n",
            "[2,   805] train_loss: 0.923 train_accuracy: 0.842 test_accuracy: 0.841\n",
            "[2,   810] train_loss: 0.762 train_accuracy: 0.865 test_accuracy: 0.860\n",
            "[2,   815] train_loss: 0.352 train_accuracy: 0.851 test_accuracy: 0.845\n",
            "[2,   820] train_loss: 0.609 train_accuracy: 0.828 test_accuracy: 0.826\n",
            "[2,   825] train_loss: 0.436 train_accuracy: 0.851 test_accuracy: 0.840\n",
            "[2,   830] train_loss: 0.500 train_accuracy: 0.860 test_accuracy: 0.849\n",
            "[2,   835] train_loss: 0.578 train_accuracy: 0.835 test_accuracy: 0.834\n",
            "[2,   840] train_loss: 0.354 train_accuracy: 0.846 test_accuracy: 0.837\n",
            "[2,   845] train_loss: 0.915 train_accuracy: 0.844 test_accuracy: 0.843\n",
            "[2,   850] train_loss: 0.544 train_accuracy: 0.833 test_accuracy: 0.840\n",
            "[2,   855] train_loss: 0.777 train_accuracy: 0.834 test_accuracy: 0.847\n",
            "[2,   860] train_loss: 0.633 train_accuracy: 0.796 test_accuracy: 0.807\n",
            "[2,   865] train_loss: 0.706 train_accuracy: 0.826 test_accuracy: 0.829\n",
            "[2,   870] train_loss: 0.403 train_accuracy: 0.812 test_accuracy: 0.813\n",
            "[2,   875] train_loss: 0.318 train_accuracy: 0.818 test_accuracy: 0.815\n",
            "[2,   880] train_loss: 0.491 train_accuracy: 0.838 test_accuracy: 0.835\n",
            "[2,   885] train_loss: 0.445 train_accuracy: 0.854 test_accuracy: 0.858\n",
            "[2,   890] train_loss: 0.343 train_accuracy: 0.860 test_accuracy: 0.874\n",
            "[2,   895] train_loss: 0.637 train_accuracy: 0.828 test_accuracy: 0.853\n",
            "[2,   900] train_loss: 0.940 train_accuracy: 0.811 test_accuracy: 0.833\n",
            "[2,   905] train_loss: 0.947 train_accuracy: 0.835 test_accuracy: 0.844\n",
            "[2,   910] train_loss: 0.385 train_accuracy: 0.837 test_accuracy: 0.831\n",
            "[2,   915] train_loss: 0.644 train_accuracy: 0.845 test_accuracy: 0.835\n",
            "[2,   920] train_loss: 0.246 train_accuracy: 0.840 test_accuracy: 0.835\n",
            "[2,   925] train_loss: 0.500 train_accuracy: 0.840 test_accuracy: 0.836\n",
            "[2,   930] train_loss: 0.725 train_accuracy: 0.842 test_accuracy: 0.840\n",
            "[2,   935] train_loss: 0.511 train_accuracy: 0.847 test_accuracy: 0.848\n",
            "[2,   940] train_loss: 0.405 train_accuracy: 0.827 test_accuracy: 0.840\n",
            "[2,   945] train_loss: 0.547 train_accuracy: 0.830 test_accuracy: 0.824\n",
            "[2,   950] train_loss: 0.831 train_accuracy: 0.829 test_accuracy: 0.820\n",
            "[2,   955] train_loss: 0.265 train_accuracy: 0.823 test_accuracy: 0.829\n",
            "[2,   960] train_loss: 0.481 train_accuracy: 0.838 test_accuracy: 0.836\n",
            "[2,   965] train_loss: 0.640 train_accuracy: 0.847 test_accuracy: 0.840\n",
            "[2,   970] train_loss: 0.396 train_accuracy: 0.853 test_accuracy: 0.849\n",
            "[2,   975] train_loss: 0.355 train_accuracy: 0.858 test_accuracy: 0.846\n",
            "[2,   980] train_loss: 0.423 train_accuracy: 0.862 test_accuracy: 0.845\n",
            "[2,   985] train_loss: 0.453 train_accuracy: 0.853 test_accuracy: 0.837\n",
            "[2,   990] train_loss: 0.413 train_accuracy: 0.865 test_accuracy: 0.842\n",
            "[2,   995] train_loss: 0.341 train_accuracy: 0.873 test_accuracy: 0.851\n",
            "[2,  1000] train_loss: 0.729 train_accuracy: 0.875 test_accuracy: 0.856\n",
            "[2,  1005] train_loss: 0.563 train_accuracy: 0.873 test_accuracy: 0.853\n",
            "[2,  1010] train_loss: 0.488 train_accuracy: 0.867 test_accuracy: 0.848\n",
            "[2,  1015] train_loss: 0.940 train_accuracy: 0.862 test_accuracy: 0.857\n",
            "[2,  1020] train_loss: 0.648 train_accuracy: 0.835 test_accuracy: 0.839\n",
            "[2,  1025] train_loss: 0.533 train_accuracy: 0.848 test_accuracy: 0.837\n",
            "[2,  1030] train_loss: 0.578 train_accuracy: 0.850 test_accuracy: 0.838\n",
            "[2,  1035] train_loss: 0.586 train_accuracy: 0.859 test_accuracy: 0.848\n",
            "[2,  1040] train_loss: 0.594 train_accuracy: 0.862 test_accuracy: 0.848\n",
            "[2,  1045] train_loss: 0.651 train_accuracy: 0.863 test_accuracy: 0.848\n",
            "[2,  1050] train_loss: 0.438 train_accuracy: 0.864 test_accuracy: 0.854\n",
            "[2,  1055] train_loss: 0.346 train_accuracy: 0.842 test_accuracy: 0.848\n",
            "[2,  1060] train_loss: 0.677 train_accuracy: 0.835 test_accuracy: 0.836\n",
            "[2,  1065] train_loss: 0.391 train_accuracy: 0.842 test_accuracy: 0.830\n",
            "[2,  1070] train_loss: 0.364 train_accuracy: 0.842 test_accuracy: 0.831\n",
            "[2,  1075] train_loss: 0.479 train_accuracy: 0.855 test_accuracy: 0.841\n",
            "[2,  1080] train_loss: 0.750 train_accuracy: 0.825 test_accuracy: 0.813\n",
            "[2,  1085] train_loss: 0.567 train_accuracy: 0.817 test_accuracy: 0.814\n",
            "[2,  1090] train_loss: 0.370 train_accuracy: 0.838 test_accuracy: 0.836\n",
            "[2,  1095] train_loss: 0.325 train_accuracy: 0.844 test_accuracy: 0.849\n",
            "[2,  1100] train_loss: 0.296 train_accuracy: 0.832 test_accuracy: 0.834\n",
            "[2,  1105] train_loss: 0.490 train_accuracy: 0.819 test_accuracy: 0.823\n",
            "[2,  1110] train_loss: 0.575 train_accuracy: 0.835 test_accuracy: 0.833\n",
            "[2,  1115] train_loss: 0.396 train_accuracy: 0.848 test_accuracy: 0.837\n",
            "[2,  1120] train_loss: 0.538 train_accuracy: 0.870 test_accuracy: 0.848\n",
            "[2,  1125] train_loss: 0.544 train_accuracy: 0.845 test_accuracy: 0.829\n",
            "[2,  1130] train_loss: 0.741 train_accuracy: 0.794 test_accuracy: 0.779\n",
            "[2,  1135] train_loss: 0.847 train_accuracy: 0.766 test_accuracy: 0.753\n",
            "[2,  1140] train_loss: 0.845 train_accuracy: 0.786 test_accuracy: 0.771\n",
            "[2,  1145] train_loss: 0.639 train_accuracy: 0.799 test_accuracy: 0.800\n",
            "[2,  1150] train_loss: 0.867 train_accuracy: 0.800 test_accuracy: 0.806\n",
            "[2,  1155] train_loss: 0.675 train_accuracy: 0.817 test_accuracy: 0.810\n",
            "[2,  1160] train_loss: 0.398 train_accuracy: 0.846 test_accuracy: 0.823\n",
            "[2,  1165] train_loss: 0.767 train_accuracy: 0.853 test_accuracy: 0.831\n",
            "[2,  1170] train_loss: 0.585 train_accuracy: 0.855 test_accuracy: 0.837\n",
            "[2,  1175] train_loss: 0.393 train_accuracy: 0.841 test_accuracy: 0.820\n",
            "[2,  1180] train_loss: 0.809 train_accuracy: 0.845 test_accuracy: 0.820\n",
            "[2,  1185] train_loss: 0.724 train_accuracy: 0.846 test_accuracy: 0.823\n",
            "[2,  1190] train_loss: 0.552 train_accuracy: 0.824 test_accuracy: 0.801\n",
            "[2,  1195] train_loss: 0.287 train_accuracy: 0.838 test_accuracy: 0.819\n",
            "[2,  1200] train_loss: 0.162 train_accuracy: 0.841 test_accuracy: 0.828\n",
            "[2,  1205] train_loss: 0.945 train_accuracy: 0.847 test_accuracy: 0.829\n",
            "[2,  1210] train_loss: 0.386 train_accuracy: 0.818 test_accuracy: 0.798\n",
            "[2,  1215] train_loss: 0.457 train_accuracy: 0.848 test_accuracy: 0.835\n",
            "[2,  1220] train_loss: 0.277 train_accuracy: 0.870 test_accuracy: 0.860\n",
            "[2,  1225] train_loss: 0.700 train_accuracy: 0.869 test_accuracy: 0.853\n",
            "[2,  1230] train_loss: 0.341 train_accuracy: 0.849 test_accuracy: 0.839\n",
            "[2,  1235] train_loss: 0.368 train_accuracy: 0.857 test_accuracy: 0.843\n",
            "[2,  1240] train_loss: 0.763 train_accuracy: 0.869 test_accuracy: 0.851\n",
            "[2,  1245] train_loss: 0.222 train_accuracy: 0.876 test_accuracy: 0.867\n",
            "[2,  1250] train_loss: 0.466 train_accuracy: 0.867 test_accuracy: 0.861\n",
            "[2,  1255] train_loss: 0.458 train_accuracy: 0.859 test_accuracy: 0.865\n",
            "[2,  1260] train_loss: 0.608 train_accuracy: 0.860 test_accuracy: 0.866\n",
            "[2,  1265] train_loss: 0.386 train_accuracy: 0.864 test_accuracy: 0.855\n",
            "[2,  1270] train_loss: 0.406 train_accuracy: 0.830 test_accuracy: 0.824\n",
            "[2,  1275] train_loss: 0.278 train_accuracy: 0.827 test_accuracy: 0.818\n",
            "[2,  1280] train_loss: 0.946 train_accuracy: 0.860 test_accuracy: 0.846\n",
            "[2,  1285] train_loss: 0.353 train_accuracy: 0.857 test_accuracy: 0.854\n",
            "[2,  1290] train_loss: 0.329 train_accuracy: 0.842 test_accuracy: 0.847\n",
            "[2,  1295] train_loss: 0.495 train_accuracy: 0.857 test_accuracy: 0.852\n",
            "[2,  1300] train_loss: 0.757 train_accuracy: 0.854 test_accuracy: 0.844\n",
            "[2,  1305] train_loss: 0.757 train_accuracy: 0.862 test_accuracy: 0.853\n",
            "[2,  1310] train_loss: 0.595 train_accuracy: 0.844 test_accuracy: 0.825\n",
            "[2,  1315] train_loss: 0.956 train_accuracy: 0.769 test_accuracy: 0.739\n",
            "[2,  1320] train_loss: 0.537 train_accuracy: 0.757 test_accuracy: 0.730\n",
            "[2,  1325] train_loss: 0.986 train_accuracy: 0.851 test_accuracy: 0.832\n",
            "[2,  1330] train_loss: 0.517 train_accuracy: 0.807 test_accuracy: 0.775\n",
            "[2,  1335] train_loss: 0.486 train_accuracy: 0.765 test_accuracy: 0.748\n",
            "[2,  1340] train_loss: 0.485 train_accuracy: 0.746 test_accuracy: 0.740\n",
            "[2,  1345] train_loss: 0.967 train_accuracy: 0.790 test_accuracy: 0.783\n",
            "[2,  1350] train_loss: 0.301 train_accuracy: 0.764 test_accuracy: 0.739\n",
            "[2,  1355] train_loss: 1.139 train_accuracy: 0.833 test_accuracy: 0.816\n",
            "[2,  1360] train_loss: 0.483 train_accuracy: 0.849 test_accuracy: 0.834\n",
            "[2,  1365] train_loss: 0.413 train_accuracy: 0.836 test_accuracy: 0.825\n",
            "[2,  1370] train_loss: 0.952 train_accuracy: 0.852 test_accuracy: 0.842\n",
            "[2,  1375] train_loss: 0.387 train_accuracy: 0.866 test_accuracy: 0.852\n",
            "[2,  1380] train_loss: 0.502 train_accuracy: 0.873 test_accuracy: 0.852\n",
            "[2,  1385] train_loss: 0.296 train_accuracy: 0.876 test_accuracy: 0.855\n",
            "[2,  1390] train_loss: 0.412 train_accuracy: 0.867 test_accuracy: 0.856\n",
            "[2,  1395] train_loss: 0.193 train_accuracy: 0.846 test_accuracy: 0.840\n",
            "[2,  1400] train_loss: 0.690 train_accuracy: 0.854 test_accuracy: 0.841\n",
            "[2,  1405] train_loss: 0.781 train_accuracy: 0.859 test_accuracy: 0.838\n",
            "[2,  1410] train_loss: 0.295 train_accuracy: 0.854 test_accuracy: 0.832\n",
            "[2,  1415] train_loss: 0.477 train_accuracy: 0.841 test_accuracy: 0.824\n",
            "[2,  1420] train_loss: 0.422 train_accuracy: 0.826 test_accuracy: 0.816\n",
            "[2,  1425] train_loss: 0.202 train_accuracy: 0.830 test_accuracy: 0.818\n",
            "[2,  1430] train_loss: 0.276 train_accuracy: 0.832 test_accuracy: 0.814\n",
            "[2,  1435] train_loss: 0.274 train_accuracy: 0.829 test_accuracy: 0.814\n",
            "[2,  1440] train_loss: 0.368 train_accuracy: 0.826 test_accuracy: 0.815\n",
            "[2,  1445] train_loss: 1.012 train_accuracy: 0.831 test_accuracy: 0.824\n",
            "[2,  1450] train_loss: 0.586 train_accuracy: 0.849 test_accuracy: 0.845\n",
            "[2,  1455] train_loss: 0.333 train_accuracy: 0.834 test_accuracy: 0.823\n",
            "[2,  1460] train_loss: 0.819 train_accuracy: 0.786 test_accuracy: 0.784\n",
            "[2,  1465] train_loss: 0.854 train_accuracy: 0.762 test_accuracy: 0.780\n",
            "[2,  1470] train_loss: 0.964 train_accuracy: 0.702 test_accuracy: 0.707\n",
            "[2,  1475] train_loss: 1.376 train_accuracy: 0.733 test_accuracy: 0.761\n",
            "[2,  1480] train_loss: 0.252 train_accuracy: 0.755 test_accuracy: 0.781\n",
            "[2,  1485] train_loss: 1.096 train_accuracy: 0.780 test_accuracy: 0.812\n",
            "[2,  1490] train_loss: 0.240 train_accuracy: 0.809 test_accuracy: 0.833\n",
            "[2,  1495] train_loss: 0.983 train_accuracy: 0.836 test_accuracy: 0.845\n",
            "[2,  1500] train_loss: 0.485 train_accuracy: 0.761 test_accuracy: 0.795\n",
            "[2,  1505] train_loss: 0.699 train_accuracy: 0.713 test_accuracy: 0.736\n",
            "[2,  1510] train_loss: 0.910 train_accuracy: 0.743 test_accuracy: 0.752\n",
            "[2,  1515] train_loss: 0.431 train_accuracy: 0.814 test_accuracy: 0.817\n",
            "[2,  1520] train_loss: 0.442 train_accuracy: 0.852 test_accuracy: 0.851\n",
            "[2,  1525] train_loss: 0.555 train_accuracy: 0.868 test_accuracy: 0.857\n",
            "[2,  1530] train_loss: 0.364 train_accuracy: 0.842 test_accuracy: 0.839\n",
            "[2,  1535] train_loss: 0.328 train_accuracy: 0.804 test_accuracy: 0.810\n",
            "[2,  1540] train_loss: 0.943 train_accuracy: 0.830 test_accuracy: 0.820\n",
            "[2,  1545] train_loss: 0.705 train_accuracy: 0.821 test_accuracy: 0.815\n",
            "[2,  1550] train_loss: 0.727 train_accuracy: 0.807 test_accuracy: 0.802\n",
            "[2,  1555] train_loss: 0.864 train_accuracy: 0.778 test_accuracy: 0.789\n",
            "[2,  1560] train_loss: 0.446 train_accuracy: 0.799 test_accuracy: 0.819\n",
            "[2,  1565] train_loss: 0.675 train_accuracy: 0.800 test_accuracy: 0.821\n",
            "[2,  1570] train_loss: 0.812 train_accuracy: 0.837 test_accuracy: 0.837\n",
            "[2,  1575] train_loss: 0.984 train_accuracy: 0.844 test_accuracy: 0.834\n",
            "[2,  1580] train_loss: 0.821 train_accuracy: 0.839 test_accuracy: 0.825\n",
            "[2,  1585] train_loss: 0.449 train_accuracy: 0.811 test_accuracy: 0.781\n",
            "[2,  1590] train_loss: 0.820 train_accuracy: 0.796 test_accuracy: 0.759\n",
            "[2,  1595] train_loss: 0.573 train_accuracy: 0.804 test_accuracy: 0.772\n",
            "[2,  1600] train_loss: 0.586 train_accuracy: 0.860 test_accuracy: 0.835\n",
            "[2,  1605] train_loss: 0.453 train_accuracy: 0.867 test_accuracy: 0.840\n",
            "[2,  1610] train_loss: 0.900 train_accuracy: 0.864 test_accuracy: 0.847\n",
            "[2,  1615] train_loss: 0.421 train_accuracy: 0.860 test_accuracy: 0.847\n",
            "[2,  1620] train_loss: 0.140 train_accuracy: 0.856 test_accuracy: 0.850\n",
            "[2,  1625] train_loss: 0.484 train_accuracy: 0.850 test_accuracy: 0.849\n",
            "[2,  1630] train_loss: 0.428 train_accuracy: 0.842 test_accuracy: 0.849\n",
            "[2,  1635] train_loss: 0.799 train_accuracy: 0.863 test_accuracy: 0.860\n",
            "[2,  1640] train_loss: 0.222 train_accuracy: 0.881 test_accuracy: 0.870\n",
            "[2,  1645] train_loss: 0.443 train_accuracy: 0.890 test_accuracy: 0.872\n",
            "[2,  1650] train_loss: 0.514 train_accuracy: 0.880 test_accuracy: 0.868\n",
            "[2,  1655] train_loss: 0.284 train_accuracy: 0.861 test_accuracy: 0.845\n",
            "[2,  1660] train_loss: 0.362 train_accuracy: 0.859 test_accuracy: 0.841\n",
            "[2,  1665] train_loss: 0.644 train_accuracy: 0.872 test_accuracy: 0.859\n",
            "[2,  1670] train_loss: 0.319 train_accuracy: 0.864 test_accuracy: 0.857\n",
            "[2,  1675] train_loss: 0.534 train_accuracy: 0.845 test_accuracy: 0.846\n",
            "[2,  1680] train_loss: 0.564 train_accuracy: 0.855 test_accuracy: 0.849\n",
            "[2,  1685] train_loss: 0.574 train_accuracy: 0.859 test_accuracy: 0.846\n",
            "[2,  1690] train_loss: 0.499 train_accuracy: 0.825 test_accuracy: 0.820\n",
            "[2,  1695] train_loss: 0.704 train_accuracy: 0.848 test_accuracy: 0.836\n",
            "[2,  1700] train_loss: 0.519 train_accuracy: 0.861 test_accuracy: 0.847\n",
            "[2,  1705] train_loss: 0.709 train_accuracy: 0.870 test_accuracy: 0.853\n",
            "[2,  1710] train_loss: 0.220 train_accuracy: 0.874 test_accuracy: 0.849\n",
            "[2,  1715] train_loss: 0.325 train_accuracy: 0.872 test_accuracy: 0.845\n",
            "[2,  1720] train_loss: 0.756 train_accuracy: 0.871 test_accuracy: 0.843\n",
            "[2,  1725] train_loss: 0.195 train_accuracy: 0.861 test_accuracy: 0.836\n",
            "[3,     5] train_loss: 0.760 train_accuracy: 0.859 test_accuracy: 0.829\n",
            "[3,    10] train_loss: 0.725 train_accuracy: 0.863 test_accuracy: 0.848\n",
            "[3,    15] train_loss: 0.631 train_accuracy: 0.844 test_accuracy: 0.831\n",
            "[3,    20] train_loss: 0.344 train_accuracy: 0.847 test_accuracy: 0.825\n",
            "[3,    25] train_loss: 0.794 train_accuracy: 0.861 test_accuracy: 0.846\n",
            "[3,    30] train_loss: 0.593 train_accuracy: 0.867 test_accuracy: 0.871\n",
            "[3,    35] train_loss: 0.147 train_accuracy: 0.860 test_accuracy: 0.875\n",
            "[3,    40] train_loss: 1.195 train_accuracy: 0.854 test_accuracy: 0.868\n",
            "[3,    45] train_loss: 0.528 train_accuracy: 0.874 test_accuracy: 0.862\n",
            "[3,    50] train_loss: 0.483 train_accuracy: 0.871 test_accuracy: 0.855\n",
            "[3,    55] train_loss: 0.539 train_accuracy: 0.851 test_accuracy: 0.845\n",
            "[3,    60] train_loss: 0.292 train_accuracy: 0.850 test_accuracy: 0.846\n",
            "[3,    65] train_loss: 0.698 train_accuracy: 0.852 test_accuracy: 0.852\n",
            "[3,    70] train_loss: 0.974 train_accuracy: 0.873 test_accuracy: 0.869\n",
            "[3,    75] train_loss: 0.625 train_accuracy: 0.880 test_accuracy: 0.865\n",
            "[3,    80] train_loss: 0.287 train_accuracy: 0.866 test_accuracy: 0.856\n",
            "[3,    85] train_loss: 0.739 train_accuracy: 0.883 test_accuracy: 0.865\n",
            "[3,    90] train_loss: 0.225 train_accuracy: 0.877 test_accuracy: 0.849\n",
            "[3,    95] train_loss: 0.137 train_accuracy: 0.844 test_accuracy: 0.812\n",
            "[3,   100] train_loss: 0.356 train_accuracy: 0.832 test_accuracy: 0.812\n",
            "[3,   105] train_loss: 1.275 train_accuracy: 0.836 test_accuracy: 0.845\n",
            "[3,   110] train_loss: 0.571 train_accuracy: 0.841 test_accuracy: 0.853\n",
            "[3,   115] train_loss: 0.927 train_accuracy: 0.854 test_accuracy: 0.863\n",
            "[3,   120] train_loss: 0.609 train_accuracy: 0.874 test_accuracy: 0.860\n",
            "[3,   125] train_loss: 0.526 train_accuracy: 0.871 test_accuracy: 0.855\n",
            "[3,   130] train_loss: 0.468 train_accuracy: 0.869 test_accuracy: 0.847\n",
            "[3,   135] train_loss: 0.522 train_accuracy: 0.860 test_accuracy: 0.840\n",
            "[3,   140] train_loss: 0.456 train_accuracy: 0.871 test_accuracy: 0.853\n",
            "[3,   145] train_loss: 0.346 train_accuracy: 0.861 test_accuracy: 0.845\n",
            "[3,   150] train_loss: 0.748 train_accuracy: 0.871 test_accuracy: 0.859\n",
            "[3,   155] train_loss: 0.485 train_accuracy: 0.879 test_accuracy: 0.862\n",
            "[3,   160] train_loss: 0.127 train_accuracy: 0.870 test_accuracy: 0.849\n",
            "[3,   165] train_loss: 0.464 train_accuracy: 0.863 test_accuracy: 0.845\n",
            "[3,   170] train_loss: 0.522 train_accuracy: 0.876 test_accuracy: 0.862\n",
            "[3,   175] train_loss: 0.628 train_accuracy: 0.892 test_accuracy: 0.885\n",
            "[3,   180] train_loss: 0.174 train_accuracy: 0.896 test_accuracy: 0.896\n",
            "[3,   185] train_loss: 0.472 train_accuracy: 0.892 test_accuracy: 0.899\n",
            "[3,   190] train_loss: 0.419 train_accuracy: 0.888 test_accuracy: 0.899\n",
            "[3,   195] train_loss: 0.929 train_accuracy: 0.901 test_accuracy: 0.895\n",
            "[3,   200] train_loss: 0.618 train_accuracy: 0.880 test_accuracy: 0.864\n",
            "[3,   205] train_loss: 0.312 train_accuracy: 0.871 test_accuracy: 0.851\n",
            "[3,   210] train_loss: 0.396 train_accuracy: 0.872 test_accuracy: 0.845\n",
            "[3,   215] train_loss: 0.326 train_accuracy: 0.883 test_accuracy: 0.858\n",
            "[3,   220] train_loss: 0.389 train_accuracy: 0.855 test_accuracy: 0.839\n",
            "[3,   225] train_loss: 0.746 train_accuracy: 0.815 test_accuracy: 0.819\n",
            "[3,   230] train_loss: 0.450 train_accuracy: 0.848 test_accuracy: 0.850\n",
            "[3,   235] train_loss: 0.604 train_accuracy: 0.871 test_accuracy: 0.864\n",
            "[3,   240] train_loss: 0.210 train_accuracy: 0.851 test_accuracy: 0.844\n",
            "[3,   245] train_loss: 0.277 train_accuracy: 0.833 test_accuracy: 0.829\n",
            "[3,   250] train_loss: 0.407 train_accuracy: 0.845 test_accuracy: 0.837\n",
            "[3,   255] train_loss: 0.483 train_accuracy: 0.880 test_accuracy: 0.874\n",
            "[3,   260] train_loss: 0.257 train_accuracy: 0.875 test_accuracy: 0.873\n",
            "[3,   265] train_loss: 0.122 train_accuracy: 0.872 test_accuracy: 0.869\n",
            "[3,   270] train_loss: 0.275 train_accuracy: 0.880 test_accuracy: 0.872\n",
            "[3,   275] train_loss: 0.358 train_accuracy: 0.885 test_accuracy: 0.877\n",
            "[3,   280] train_loss: 0.643 train_accuracy: 0.886 test_accuracy: 0.877\n",
            "[3,   285] train_loss: 0.244 train_accuracy: 0.856 test_accuracy: 0.839\n",
            "[3,   290] train_loss: 0.650 train_accuracy: 0.836 test_accuracy: 0.805\n",
            "[3,   295] train_loss: 0.834 train_accuracy: 0.849 test_accuracy: 0.816\n",
            "[3,   300] train_loss: 1.038 train_accuracy: 0.884 test_accuracy: 0.860\n",
            "[3,   305] train_loss: 0.806 train_accuracy: 0.895 test_accuracy: 0.880\n",
            "[3,   310] train_loss: 0.584 train_accuracy: 0.887 test_accuracy: 0.876\n",
            "[3,   315] train_loss: 0.436 train_accuracy: 0.874 test_accuracy: 0.870\n",
            "[3,   320] train_loss: 0.489 train_accuracy: 0.880 test_accuracy: 0.870\n",
            "[3,   325] train_loss: 0.658 train_accuracy: 0.891 test_accuracy: 0.883\n",
            "[3,   330] train_loss: 0.306 train_accuracy: 0.903 test_accuracy: 0.891\n",
            "[3,   335] train_loss: 0.516 train_accuracy: 0.903 test_accuracy: 0.892\n",
            "[3,   340] train_loss: 0.729 train_accuracy: 0.894 test_accuracy: 0.881\n",
            "[3,   345] train_loss: 0.292 train_accuracy: 0.889 test_accuracy: 0.881\n",
            "[3,   350] train_loss: 0.274 train_accuracy: 0.892 test_accuracy: 0.885\n",
            "[3,   355] train_loss: 0.931 train_accuracy: 0.892 test_accuracy: 0.884\n",
            "[3,   360] train_loss: 0.302 train_accuracy: 0.866 test_accuracy: 0.856\n",
            "[3,   365] train_loss: 0.595 train_accuracy: 0.866 test_accuracy: 0.853\n",
            "[3,   370] train_loss: 0.220 train_accuracy: 0.870 test_accuracy: 0.858\n",
            "[3,   375] train_loss: 0.420 train_accuracy: 0.864 test_accuracy: 0.853\n",
            "[3,   380] train_loss: 0.411 train_accuracy: 0.871 test_accuracy: 0.863\n",
            "[3,   385] train_loss: 0.571 train_accuracy: 0.890 test_accuracy: 0.870\n",
            "[3,   390] train_loss: 0.262 train_accuracy: 0.895 test_accuracy: 0.877\n",
            "[3,   395] train_loss: 0.083 train_accuracy: 0.889 test_accuracy: 0.877\n",
            "[3,   400] train_loss: 0.075 train_accuracy: 0.883 test_accuracy: 0.872\n",
            "[3,   405] train_loss: 0.816 train_accuracy: 0.884 test_accuracy: 0.871\n",
            "[3,   410] train_loss: 0.436 train_accuracy: 0.885 test_accuracy: 0.874\n",
            "[3,   415] train_loss: 0.422 train_accuracy: 0.882 test_accuracy: 0.875\n",
            "[3,   420] train_loss: 0.286 train_accuracy: 0.881 test_accuracy: 0.870\n",
            "[3,   425] train_loss: 0.159 train_accuracy: 0.881 test_accuracy: 0.865\n",
            "[3,   430] train_loss: 0.370 train_accuracy: 0.887 test_accuracy: 0.863\n",
            "[3,   435] train_loss: 0.637 train_accuracy: 0.886 test_accuracy: 0.863\n",
            "[3,   440] train_loss: 0.239 train_accuracy: 0.892 test_accuracy: 0.869\n",
            "[3,   445] train_loss: 0.726 train_accuracy: 0.901 test_accuracy: 0.881\n",
            "[3,   450] train_loss: 0.423 train_accuracy: 0.895 test_accuracy: 0.882\n",
            "[3,   455] train_loss: 0.455 train_accuracy: 0.885 test_accuracy: 0.873\n",
            "[3,   460] train_loss: 0.330 train_accuracy: 0.885 test_accuracy: 0.874\n",
            "[3,   465] train_loss: 0.161 train_accuracy: 0.882 test_accuracy: 0.872\n",
            "[3,   470] train_loss: 0.751 train_accuracy: 0.888 test_accuracy: 0.870\n",
            "[3,   475] train_loss: 0.571 train_accuracy: 0.898 test_accuracy: 0.883\n",
            "[3,   480] train_loss: 0.488 train_accuracy: 0.898 test_accuracy: 0.882\n",
            "[3,   485] train_loss: 0.395 train_accuracy: 0.884 test_accuracy: 0.871\n",
            "[3,   490] train_loss: 0.137 train_accuracy: 0.870 test_accuracy: 0.863\n",
            "[3,   495] train_loss: 0.232 train_accuracy: 0.869 test_accuracy: 0.857\n",
            "[3,   500] train_loss: 0.243 train_accuracy: 0.885 test_accuracy: 0.858\n",
            "[3,   505] train_loss: 0.195 train_accuracy: 0.870 test_accuracy: 0.842\n",
            "[3,   510] train_loss: 0.376 train_accuracy: 0.852 test_accuracy: 0.819\n",
            "[3,   515] train_loss: 0.856 train_accuracy: 0.893 test_accuracy: 0.860\n",
            "[3,   520] train_loss: 0.945 train_accuracy: 0.872 test_accuracy: 0.848\n",
            "[3,   525] train_loss: 0.756 train_accuracy: 0.863 test_accuracy: 0.843\n",
            "[3,   530] train_loss: 0.562 train_accuracy: 0.839 test_accuracy: 0.824\n",
            "[3,   535] train_loss: 0.258 train_accuracy: 0.837 test_accuracy: 0.828\n",
            "[3,   540] train_loss: 0.419 train_accuracy: 0.850 test_accuracy: 0.842\n",
            "[3,   545] train_loss: 0.685 train_accuracy: 0.869 test_accuracy: 0.855\n",
            "[3,   550] train_loss: 0.351 train_accuracy: 0.880 test_accuracy: 0.866\n",
            "[3,   555] train_loss: 0.354 train_accuracy: 0.880 test_accuracy: 0.861\n",
            "[3,   560] train_loss: 0.399 train_accuracy: 0.878 test_accuracy: 0.859\n",
            "[3,   565] train_loss: 0.341 train_accuracy: 0.885 test_accuracy: 0.864\n",
            "[3,   570] train_loss: 0.490 train_accuracy: 0.866 test_accuracy: 0.847\n",
            "[3,   575] train_loss: 0.386 train_accuracy: 0.849 test_accuracy: 0.851\n",
            "[3,   580] train_loss: 0.616 train_accuracy: 0.865 test_accuracy: 0.853\n",
            "[3,   585] train_loss: 0.278 train_accuracy: 0.844 test_accuracy: 0.827\n",
            "[3,   590] train_loss: 0.912 train_accuracy: 0.842 test_accuracy: 0.811\n",
            "[3,   595] train_loss: 1.087 train_accuracy: 0.860 test_accuracy: 0.833\n",
            "[3,   600] train_loss: 0.116 train_accuracy: 0.865 test_accuracy: 0.848\n",
            "[3,   605] train_loss: 0.531 train_accuracy: 0.841 test_accuracy: 0.836\n",
            "[3,   610] train_loss: 0.919 train_accuracy: 0.823 test_accuracy: 0.824\n",
            "[3,   615] train_loss: 0.378 train_accuracy: 0.855 test_accuracy: 0.846\n",
            "[3,   620] train_loss: 0.471 train_accuracy: 0.863 test_accuracy: 0.846\n",
            "[3,   625] train_loss: 0.457 train_accuracy: 0.866 test_accuracy: 0.847\n",
            "[3,   630] train_loss: 0.742 train_accuracy: 0.874 test_accuracy: 0.851\n",
            "[3,   635] train_loss: 0.494 train_accuracy: 0.876 test_accuracy: 0.856\n",
            "[3,   640] train_loss: 0.312 train_accuracy: 0.881 test_accuracy: 0.866\n",
            "[3,   645] train_loss: 0.090 train_accuracy: 0.885 test_accuracy: 0.870\n",
            "[3,   650] train_loss: 0.395 train_accuracy: 0.883 test_accuracy: 0.871\n",
            "[3,   655] train_loss: 0.294 train_accuracy: 0.879 test_accuracy: 0.867\n",
            "[3,   660] train_loss: 0.797 train_accuracy: 0.884 test_accuracy: 0.871\n",
            "[3,   665] train_loss: 0.381 train_accuracy: 0.882 test_accuracy: 0.867\n",
            "[3,   670] train_loss: 0.428 train_accuracy: 0.881 test_accuracy: 0.861\n",
            "[3,   675] train_loss: 0.138 train_accuracy: 0.871 test_accuracy: 0.848\n",
            "[3,   680] train_loss: 0.342 train_accuracy: 0.877 test_accuracy: 0.853\n",
            "[3,   685] train_loss: 0.579 train_accuracy: 0.884 test_accuracy: 0.861\n",
            "[3,   690] train_loss: 0.632 train_accuracy: 0.884 test_accuracy: 0.860\n",
            "[3,   695] train_loss: 0.301 train_accuracy: 0.886 test_accuracy: 0.865\n",
            "[3,   700] train_loss: 0.593 train_accuracy: 0.896 test_accuracy: 0.879\n",
            "[3,   705] train_loss: 0.372 train_accuracy: 0.883 test_accuracy: 0.875\n",
            "[3,   710] train_loss: 0.429 train_accuracy: 0.875 test_accuracy: 0.867\n",
            "[3,   715] train_loss: 0.216 train_accuracy: 0.855 test_accuracy: 0.851\n",
            "[3,   720] train_loss: 0.698 train_accuracy: 0.882 test_accuracy: 0.867\n",
            "[3,   725] train_loss: 0.156 train_accuracy: 0.878 test_accuracy: 0.861\n",
            "[3,   730] train_loss: 0.834 train_accuracy: 0.865 test_accuracy: 0.849\n",
            "[3,   735] train_loss: 0.439 train_accuracy: 0.859 test_accuracy: 0.839\n",
            "[3,   740] train_loss: 0.401 train_accuracy: 0.860 test_accuracy: 0.843\n",
            "[3,   745] train_loss: 0.198 train_accuracy: 0.856 test_accuracy: 0.854\n",
            "[3,   750] train_loss: 0.703 train_accuracy: 0.825 test_accuracy: 0.830\n",
            "[3,   755] train_loss: 0.442 train_accuracy: 0.832 test_accuracy: 0.831\n",
            "[3,   760] train_loss: 0.729 train_accuracy: 0.870 test_accuracy: 0.854\n",
            "[3,   765] train_loss: 0.366 train_accuracy: 0.887 test_accuracy: 0.863\n",
            "[3,   770] train_loss: 0.335 train_accuracy: 0.887 test_accuracy: 0.862\n",
            "[3,   775] train_loss: 0.358 train_accuracy: 0.875 test_accuracy: 0.852\n",
            "[3,   780] train_loss: 0.415 train_accuracy: 0.874 test_accuracy: 0.855\n",
            "[3,   785] train_loss: 0.141 train_accuracy: 0.882 test_accuracy: 0.856\n",
            "[3,   790] train_loss: 0.289 train_accuracy: 0.881 test_accuracy: 0.851\n",
            "[3,   795] train_loss: 0.192 train_accuracy: 0.879 test_accuracy: 0.850\n",
            "[3,   800] train_loss: 0.296 train_accuracy: 0.871 test_accuracy: 0.854\n",
            "[3,   805] train_loss: 1.018 train_accuracy: 0.885 test_accuracy: 0.877\n",
            "[3,   810] train_loss: 0.336 train_accuracy: 0.892 test_accuracy: 0.886\n",
            "[3,   815] train_loss: 0.486 train_accuracy: 0.894 test_accuracy: 0.883\n",
            "[3,   820] train_loss: 0.449 train_accuracy: 0.883 test_accuracy: 0.871\n",
            "[3,   825] train_loss: 0.711 train_accuracy: 0.889 test_accuracy: 0.874\n",
            "[3,   830] train_loss: 0.490 train_accuracy: 0.894 test_accuracy: 0.870\n",
            "[3,   835] train_loss: 0.947 train_accuracy: 0.898 test_accuracy: 0.871\n",
            "[3,   840] train_loss: 0.356 train_accuracy: 0.887 test_accuracy: 0.863\n",
            "[3,   845] train_loss: 0.343 train_accuracy: 0.876 test_accuracy: 0.852\n",
            "[3,   850] train_loss: 0.300 train_accuracy: 0.876 test_accuracy: 0.854\n",
            "[3,   855] train_loss: 0.453 train_accuracy: 0.889 test_accuracy: 0.871\n",
            "[3,   860] train_loss: 0.304 train_accuracy: 0.888 test_accuracy: 0.874\n",
            "[3,   865] train_loss: 0.521 train_accuracy: 0.883 test_accuracy: 0.865\n",
            "[3,   870] train_loss: 0.215 train_accuracy: 0.878 test_accuracy: 0.860\n",
            "[3,   875] train_loss: 0.411 train_accuracy: 0.878 test_accuracy: 0.859\n",
            "[3,   880] train_loss: 0.405 train_accuracy: 0.876 test_accuracy: 0.854\n",
            "[3,   885] train_loss: 0.231 train_accuracy: 0.883 test_accuracy: 0.856\n",
            "[3,   890] train_loss: 0.616 train_accuracy: 0.884 test_accuracy: 0.856\n",
            "[3,   895] train_loss: 0.701 train_accuracy: 0.876 test_accuracy: 0.850\n",
            "[3,   900] train_loss: 0.594 train_accuracy: 0.862 test_accuracy: 0.841\n",
            "[3,   905] train_loss: 0.427 train_accuracy: 0.872 test_accuracy: 0.861\n",
            "[3,   910] train_loss: 0.191 train_accuracy: 0.863 test_accuracy: 0.859\n",
            "[3,   915] train_loss: 0.498 train_accuracy: 0.851 test_accuracy: 0.851\n",
            "[3,   920] train_loss: 0.726 train_accuracy: 0.870 test_accuracy: 0.862\n",
            "[3,   925] train_loss: 0.841 train_accuracy: 0.888 test_accuracy: 0.876\n",
            "[3,   930] train_loss: 0.196 train_accuracy: 0.884 test_accuracy: 0.867\n",
            "[3,   935] train_loss: 0.486 train_accuracy: 0.880 test_accuracy: 0.861\n",
            "[3,   940] train_loss: 0.402 train_accuracy: 0.882 test_accuracy: 0.862\n",
            "[3,   945] train_loss: 0.308 train_accuracy: 0.869 test_accuracy: 0.846\n",
            "[3,   950] train_loss: 0.419 train_accuracy: 0.868 test_accuracy: 0.852\n",
            "[3,   955] train_loss: 0.668 train_accuracy: 0.894 test_accuracy: 0.872\n",
            "[3,   960] train_loss: 0.153 train_accuracy: 0.887 test_accuracy: 0.866\n",
            "[3,   965] train_loss: 0.487 train_accuracy: 0.885 test_accuracy: 0.868\n",
            "[3,   970] train_loss: 0.170 train_accuracy: 0.877 test_accuracy: 0.859\n",
            "[3,   975] train_loss: 0.576 train_accuracy: 0.875 test_accuracy: 0.858\n",
            "[3,   980] train_loss: 0.675 train_accuracy: 0.891 test_accuracy: 0.875\n",
            "[3,   985] train_loss: 0.202 train_accuracy: 0.887 test_accuracy: 0.870\n",
            "[3,   990] train_loss: 0.391 train_accuracy: 0.885 test_accuracy: 0.869\n",
            "[3,   995] train_loss: 0.296 train_accuracy: 0.877 test_accuracy: 0.861\n",
            "[3,  1000] train_loss: 0.707 train_accuracy: 0.889 test_accuracy: 0.871\n",
            "[3,  1005] train_loss: 0.412 train_accuracy: 0.898 test_accuracy: 0.880\n",
            "[3,  1010] train_loss: 0.242 train_accuracy: 0.895 test_accuracy: 0.881\n",
            "[3,  1015] train_loss: 0.236 train_accuracy: 0.889 test_accuracy: 0.883\n",
            "[3,  1020] train_loss: 0.502 train_accuracy: 0.853 test_accuracy: 0.860\n",
            "[3,  1025] train_loss: 0.625 train_accuracy: 0.872 test_accuracy: 0.858\n",
            "[3,  1030] train_loss: 0.894 train_accuracy: 0.871 test_accuracy: 0.854\n",
            "[3,  1035] train_loss: 0.712 train_accuracy: 0.856 test_accuracy: 0.834\n",
            "[3,  1040] train_loss: 0.615 train_accuracy: 0.841 test_accuracy: 0.825\n",
            "[3,  1045] train_loss: 0.568 train_accuracy: 0.860 test_accuracy: 0.839\n",
            "[3,  1050] train_loss: 0.244 train_accuracy: 0.864 test_accuracy: 0.839\n",
            "[3,  1055] train_loss: 0.211 train_accuracy: 0.845 test_accuracy: 0.817\n",
            "[3,  1060] train_loss: 1.176 train_accuracy: 0.857 test_accuracy: 0.829\n",
            "[3,  1065] train_loss: 0.364 train_accuracy: 0.866 test_accuracy: 0.843\n",
            "[3,  1070] train_loss: 0.607 train_accuracy: 0.877 test_accuracy: 0.866\n",
            "[3,  1075] train_loss: 0.327 train_accuracy: 0.877 test_accuracy: 0.878\n",
            "[3,  1080] train_loss: 0.289 train_accuracy: 0.881 test_accuracy: 0.888\n",
            "[3,  1085] train_loss: 0.659 train_accuracy: 0.885 test_accuracy: 0.894\n",
            "[3,  1090] train_loss: 0.634 train_accuracy: 0.894 test_accuracy: 0.890\n",
            "[3,  1095] train_loss: 0.640 train_accuracy: 0.881 test_accuracy: 0.868\n",
            "[3,  1100] train_loss: 0.581 train_accuracy: 0.874 test_accuracy: 0.860\n",
            "[3,  1105] train_loss: 0.192 train_accuracy: 0.889 test_accuracy: 0.870\n",
            "[3,  1110] train_loss: 0.715 train_accuracy: 0.893 test_accuracy: 0.876\n",
            "[3,  1115] train_loss: 0.258 train_accuracy: 0.879 test_accuracy: 0.866\n",
            "[3,  1120] train_loss: 0.115 train_accuracy: 0.884 test_accuracy: 0.870\n",
            "[3,  1125] train_loss: 0.390 train_accuracy: 0.882 test_accuracy: 0.863\n",
            "[3,  1130] train_loss: 0.187 train_accuracy: 0.875 test_accuracy: 0.856\n",
            "[3,  1135] train_loss: 0.235 train_accuracy: 0.875 test_accuracy: 0.859\n",
            "[3,  1140] train_loss: 0.875 train_accuracy: 0.878 test_accuracy: 0.859\n",
            "[3,  1145] train_loss: 0.218 train_accuracy: 0.860 test_accuracy: 0.834\n",
            "[3,  1150] train_loss: 0.847 train_accuracy: 0.881 test_accuracy: 0.860\n",
            "[3,  1155] train_loss: 1.163 train_accuracy: 0.893 test_accuracy: 0.882\n",
            "[3,  1160] train_loss: 0.360 train_accuracy: 0.847 test_accuracy: 0.835\n",
            "[3,  1165] train_loss: 0.469 train_accuracy: 0.798 test_accuracy: 0.773\n",
            "[3,  1170] train_loss: 0.663 train_accuracy: 0.798 test_accuracy: 0.784\n",
            "[3,  1175] train_loss: 1.057 train_accuracy: 0.822 test_accuracy: 0.814\n",
            "[3,  1180] train_loss: 0.801 train_accuracy: 0.867 test_accuracy: 0.854\n",
            "[3,  1185] train_loss: 0.325 train_accuracy: 0.867 test_accuracy: 0.861\n",
            "[3,  1190] train_loss: 0.878 train_accuracy: 0.862 test_accuracy: 0.859\n",
            "[3,  1195] train_loss: 0.325 train_accuracy: 0.851 test_accuracy: 0.854\n",
            "[3,  1200] train_loss: 0.964 train_accuracy: 0.849 test_accuracy: 0.853\n",
            "[3,  1205] train_loss: 0.179 train_accuracy: 0.862 test_accuracy: 0.850\n",
            "[3,  1210] train_loss: 0.456 train_accuracy: 0.874 test_accuracy: 0.851\n",
            "[3,  1215] train_loss: 0.171 train_accuracy: 0.873 test_accuracy: 0.858\n",
            "[3,  1220] train_loss: 0.374 train_accuracy: 0.872 test_accuracy: 0.857\n",
            "[3,  1225] train_loss: 0.253 train_accuracy: 0.875 test_accuracy: 0.861\n",
            "[3,  1230] train_loss: 0.486 train_accuracy: 0.884 test_accuracy: 0.867\n",
            "[3,  1235] train_loss: 0.624 train_accuracy: 0.859 test_accuracy: 0.848\n",
            "[3,  1240] train_loss: 0.148 train_accuracy: 0.847 test_accuracy: 0.840\n",
            "[3,  1245] train_loss: 0.411 train_accuracy: 0.861 test_accuracy: 0.852\n",
            "[3,  1250] train_loss: 0.374 train_accuracy: 0.888 test_accuracy: 0.870\n",
            "[3,  1255] train_loss: 0.317 train_accuracy: 0.896 test_accuracy: 0.873\n",
            "[3,  1260] train_loss: 0.356 train_accuracy: 0.905 test_accuracy: 0.883\n",
            "[3,  1265] train_loss: 0.298 train_accuracy: 0.905 test_accuracy: 0.881\n",
            "[3,  1270] train_loss: 0.258 train_accuracy: 0.896 test_accuracy: 0.876\n",
            "[3,  1275] train_loss: 0.067 train_accuracy: 0.882 test_accuracy: 0.866\n",
            "[3,  1280] train_loss: 0.281 train_accuracy: 0.872 test_accuracy: 0.855\n",
            "[3,  1285] train_loss: 0.396 train_accuracy: 0.890 test_accuracy: 0.864\n",
            "[3,  1290] train_loss: 0.643 train_accuracy: 0.900 test_accuracy: 0.872\n",
            "[3,  1295] train_loss: 0.406 train_accuracy: 0.885 test_accuracy: 0.852\n",
            "[3,  1300] train_loss: 0.236 train_accuracy: 0.876 test_accuracy: 0.844\n",
            "[3,  1305] train_loss: 0.261 train_accuracy: 0.869 test_accuracy: 0.844\n",
            "[3,  1310] train_loss: 0.371 train_accuracy: 0.881 test_accuracy: 0.859\n",
            "[3,  1315] train_loss: 0.228 train_accuracy: 0.888 test_accuracy: 0.868\n",
            "[3,  1320] train_loss: 0.562 train_accuracy: 0.897 test_accuracy: 0.882\n",
            "[3,  1325] train_loss: 0.478 train_accuracy: 0.904 test_accuracy: 0.883\n",
            "[3,  1330] train_loss: 0.542 train_accuracy: 0.897 test_accuracy: 0.878\n",
            "[3,  1335] train_loss: 0.101 train_accuracy: 0.891 test_accuracy: 0.875\n",
            "[3,  1340] train_loss: 0.397 train_accuracy: 0.899 test_accuracy: 0.880\n",
            "[3,  1345] train_loss: 0.225 train_accuracy: 0.902 test_accuracy: 0.878\n",
            "[3,  1350] train_loss: 0.503 train_accuracy: 0.884 test_accuracy: 0.861\n",
            "[3,  1355] train_loss: 0.502 train_accuracy: 0.890 test_accuracy: 0.868\n",
            "[3,  1360] train_loss: 0.423 train_accuracy: 0.904 test_accuracy: 0.887\n",
            "[3,  1365] train_loss: 0.344 train_accuracy: 0.904 test_accuracy: 0.886\n",
            "[3,  1370] train_loss: 1.186 train_accuracy: 0.903 test_accuracy: 0.892\n",
            "[3,  1375] train_loss: 0.604 train_accuracy: 0.882 test_accuracy: 0.886\n",
            "[3,  1380] train_loss: 0.328 train_accuracy: 0.857 test_accuracy: 0.867\n",
            "[3,  1385] train_loss: 0.471 train_accuracy: 0.844 test_accuracy: 0.857\n",
            "[3,  1390] train_loss: 0.655 train_accuracy: 0.856 test_accuracy: 0.865\n",
            "[3,  1395] train_loss: 0.969 train_accuracy: 0.877 test_accuracy: 0.864\n",
            "[3,  1400] train_loss: 0.368 train_accuracy: 0.892 test_accuracy: 0.874\n",
            "[3,  1405] train_loss: 0.463 train_accuracy: 0.887 test_accuracy: 0.879\n",
            "[3,  1410] train_loss: 0.314 train_accuracy: 0.872 test_accuracy: 0.875\n",
            "[3,  1415] train_loss: 0.321 train_accuracy: 0.897 test_accuracy: 0.874\n",
            "[3,  1420] train_loss: 0.588 train_accuracy: 0.868 test_accuracy: 0.839\n",
            "[3,  1425] train_loss: 0.374 train_accuracy: 0.836 test_accuracy: 0.819\n",
            "[3,  1430] train_loss: 0.226 train_accuracy: 0.816 test_accuracy: 0.810\n",
            "[3,  1435] train_loss: 0.430 train_accuracy: 0.833 test_accuracy: 0.830\n",
            "[3,  1440] train_loss: 0.345 train_accuracy: 0.871 test_accuracy: 0.862\n",
            "[3,  1445] train_loss: 0.546 train_accuracy: 0.894 test_accuracy: 0.880\n",
            "[3,  1450] train_loss: 0.081 train_accuracy: 0.883 test_accuracy: 0.869\n",
            "[3,  1455] train_loss: 0.591 train_accuracy: 0.871 test_accuracy: 0.861\n",
            "[3,  1460] train_loss: 0.191 train_accuracy: 0.881 test_accuracy: 0.871\n",
            "[3,  1465] train_loss: 0.439 train_accuracy: 0.901 test_accuracy: 0.886\n",
            "[3,  1470] train_loss: 0.251 train_accuracy: 0.904 test_accuracy: 0.882\n",
            "[3,  1475] train_loss: 0.077 train_accuracy: 0.899 test_accuracy: 0.873\n",
            "[3,  1480] train_loss: 0.248 train_accuracy: 0.895 test_accuracy: 0.871\n",
            "[3,  1485] train_loss: 0.086 train_accuracy: 0.895 test_accuracy: 0.872\n",
            "[3,  1490] train_loss: 0.382 train_accuracy: 0.899 test_accuracy: 0.877\n",
            "[3,  1495] train_loss: 0.367 train_accuracy: 0.902 test_accuracy: 0.883\n",
            "[3,  1500] train_loss: 0.378 train_accuracy: 0.888 test_accuracy: 0.883\n",
            "[3,  1505] train_loss: 0.139 train_accuracy: 0.890 test_accuracy: 0.887\n",
            "[3,  1510] train_loss: 0.514 train_accuracy: 0.906 test_accuracy: 0.893\n",
            "[3,  1515] train_loss: 0.371 train_accuracy: 0.894 test_accuracy: 0.878\n",
            "[3,  1520] train_loss: 0.155 train_accuracy: 0.864 test_accuracy: 0.849\n",
            "[3,  1525] train_loss: 0.323 train_accuracy: 0.860 test_accuracy: 0.846\n",
            "[3,  1530] train_loss: 0.532 train_accuracy: 0.873 test_accuracy: 0.865\n",
            "[3,  1535] train_loss: 0.419 train_accuracy: 0.845 test_accuracy: 0.840\n",
            "[3,  1540] train_loss: 0.715 train_accuracy: 0.835 test_accuracy: 0.825\n",
            "[3,  1545] train_loss: 0.848 train_accuracy: 0.849 test_accuracy: 0.840\n",
            "[3,  1550] train_loss: 0.518 train_accuracy: 0.834 test_accuracy: 0.834\n",
            "[3,  1555] train_loss: 0.399 train_accuracy: 0.831 test_accuracy: 0.829\n",
            "[3,  1560] train_loss: 0.743 train_accuracy: 0.826 test_accuracy: 0.834\n",
            "[3,  1565] train_loss: 0.533 train_accuracy: 0.853 test_accuracy: 0.845\n",
            "[3,  1570] train_loss: 0.267 train_accuracy: 0.859 test_accuracy: 0.853\n",
            "[3,  1575] train_loss: 0.361 train_accuracy: 0.874 test_accuracy: 0.862\n",
            "[3,  1580] train_loss: 0.502 train_accuracy: 0.876 test_accuracy: 0.875\n",
            "[3,  1585] train_loss: 0.442 train_accuracy: 0.881 test_accuracy: 0.878\n",
            "[3,  1590] train_loss: 0.506 train_accuracy: 0.891 test_accuracy: 0.882\n",
            "[3,  1595] train_loss: 0.687 train_accuracy: 0.896 test_accuracy: 0.878\n",
            "[3,  1600] train_loss: 0.288 train_accuracy: 0.886 test_accuracy: 0.866\n",
            "[3,  1605] train_loss: 0.250 train_accuracy: 0.874 test_accuracy: 0.853\n",
            "[3,  1610] train_loss: 0.783 train_accuracy: 0.880 test_accuracy: 0.855\n",
            "[3,  1615] train_loss: 0.285 train_accuracy: 0.888 test_accuracy: 0.856\n",
            "[3,  1620] train_loss: 0.630 train_accuracy: 0.890 test_accuracy: 0.857\n",
            "[3,  1625] train_loss: 0.631 train_accuracy: 0.890 test_accuracy: 0.857\n",
            "[3,  1630] train_loss: 0.350 train_accuracy: 0.900 test_accuracy: 0.869\n",
            "[3,  1635] train_loss: 0.503 train_accuracy: 0.890 test_accuracy: 0.863\n",
            "[3,  1640] train_loss: 1.255 train_accuracy: 0.892 test_accuracy: 0.867\n",
            "[3,  1645] train_loss: 0.501 train_accuracy: 0.892 test_accuracy: 0.874\n",
            "[3,  1650] train_loss: 0.220 train_accuracy: 0.897 test_accuracy: 0.884\n",
            "[3,  1655] train_loss: 0.265 train_accuracy: 0.897 test_accuracy: 0.886\n",
            "[3,  1660] train_loss: 0.229 train_accuracy: 0.894 test_accuracy: 0.886\n",
            "[3,  1665] train_loss: 0.536 train_accuracy: 0.892 test_accuracy: 0.877\n",
            "[3,  1670] train_loss: 0.239 train_accuracy: 0.874 test_accuracy: 0.861\n",
            "[3,  1675] train_loss: 0.680 train_accuracy: 0.874 test_accuracy: 0.859\n",
            "[3,  1680] train_loss: 0.587 train_accuracy: 0.890 test_accuracy: 0.876\n",
            "[3,  1685] train_loss: 0.256 train_accuracy: 0.884 test_accuracy: 0.867\n",
            "[3,  1690] train_loss: 0.329 train_accuracy: 0.878 test_accuracy: 0.867\n",
            "[3,  1695] train_loss: 0.424 train_accuracy: 0.889 test_accuracy: 0.875\n",
            "[3,  1700] train_loss: 0.952 train_accuracy: 0.894 test_accuracy: 0.875\n",
            "[3,  1705] train_loss: 0.448 train_accuracy: 0.872 test_accuracy: 0.855\n",
            "[3,  1710] train_loss: 1.024 train_accuracy: 0.885 test_accuracy: 0.868\n",
            "[3,  1715] train_loss: 0.124 train_accuracy: 0.906 test_accuracy: 0.887\n",
            "[3,  1720] train_loss: 0.284 train_accuracy: 0.909 test_accuracy: 0.893\n",
            "[3,  1725] train_loss: 0.600 train_accuracy: 0.910 test_accuracy: 0.887\n"
          ]
        }
      ],
      "source": [
        "import numpy as np\n",
        "import torch\n",
        "import torchvision\n",
        "import torch.nn as nn\n",
        "from matplotlib import pyplot as plt\n",
        "import random\n",
        "import torch.optim as optim\n",
        "import torchvision.transforms as transforms\n",
        "import time\n",
        "import torch\n",
        "import torch.nn as nn\n",
        "import torch.nn.functional as F\n",
        "from collections import Counter, defaultdict\n",
        "from itertools import combinations\n",
        "import random\n",
        "\n",
        "a_train = []  # To store training accuracy\n",
        "a_test = []  # To store test accuracy\n",
        "lossaaa = []  # To store loss values\n",
        "Inf = []  # To store information values\n",
        "Var_all = []  # To store all variation values\n",
        "Generalization_Ratio_ = []  # To store generalization ratios\n",
        "dicide_action = []  # To store decision actions\n",
        "loss_before = torch.tensor(30.0)  # Initial loss value\n",
        "los = torch.tensor(30.0)  # Initial loss value\n",
        "dis_before_A1 = torch.tensor(0)  # Distance before action A1\n",
        "dis_before_A2 = torch.tensor(0)  # Distance before action A2\n",
        "dis = torch.tensor(1)  # Distance increment\n",
        "per = \"N\"  # Previous iteration's action\n",
        "state_before = torch.tensor(0.0)  # Previous state value\n",
        "categrary_number = 10  # Number of categories\n",
        "tra_val_number = 2  # Training validation number\n",
        "\n",
        "for epoch in range(5):  # Loop over the dataset multiple times\n",
        "    e3412_iter = iter(e3412_loader)  # Iterator for e3412_loader\n",
        "    running_loss = 0.0  # Running loss\n",
        "    running_loss_all = 0.0  # Running loss for all\n",
        "    loss_before = Variable(loss_before, requires_grad=False)  # Make loss_before a variable\n",
        "    los = Variable(los, requires_grad=False)  # Make los a variable\n",
        "    state_before = Variable(state_before, requires_grad=False)  # Make state_before a variable\n",
        "    dis_before_A1 = Variable(dis_before_A1, requires_grad=False)  # Make dis_before_A1 a variable\n",
        "    dis_before_A2 = Variable(dis_before_A2, requires_grad=False)  # Make dis_before_A2 a variable\n",
        "    acc_A1 = Variable(dis_before_A1, requires_grad=False)  # Make acc_A1 a variable\n",
        "    acc_A2 = Variable(dis_before_A2, requires_grad=False)  # Make acc_A2 a variable\n",
        "    dis_ = Variable(dis, requires_grad=False)  # Make dis a variable\n",
        "\n",
        "    for step, (imgs, labels) in enumerate(e1234_loader):  # Iterate over the data\n",
        "        ### calculate losses\n",
        "        weight_val_probility = 1.0 / tra_val_number  # Weight for validation probability\n",
        "        labels = labels.cuda()  # Move labels to GPU\n",
        "        imgs = imgs.cuda()  # Move images to GPU\n",
        "        out_e1234 = net(imgs)  # Get output from the network\n",
        "        loss_out_e1234 = loss_function(out_e1234, labels)  # Calculate loss for e1234 (A1)\n",
        "\n",
        "        e3412_imgs, e3412_labels = next(e3412_iter)  # Get next batch from e3412_iter\n",
        "        e3412_imgs = e3412_imgs.cuda()  # Move e3412 images to GPU\n",
        "        e3412_labels = e3412_labels.cuda()  # Move e3412 labels to GPU\n",
        "        out_e3412 = net(e3412_imgs)  # Get output from the network\n",
        "        loss_out_e3412 = loss_function(out_e3412, e3412_labels)  # Calculate loss for e3412 (A3)\n",
        "\n",
        "        #################### extract\n",
        "        ## e1 ext count\n",
        "        e12_extracted_loader_image = e12_extracted_loader_image.cuda()  # Move e12 extracted images to GPU\n",
        "        e12_extracted_loader_label = e12_extracted_loader_label.cuda()  # Move e12 extracted labels to GPU\n",
        "        e12_extracted_out = net(e12_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e12_extracted = torch.max(e12_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ## e2 ext count\n",
        "        e34_extracted_loader_image = e34_extracted_loader_image.cuda()  # Move e34 extracted images to GPU\n",
        "        e34_extracted_loader_label = e34_extracted_loader_label.cuda()  # Move e34 extracted labels to GPU\n",
        "        e34_extracted_out = net(e34_extracted_loader_image).detach()  # Get detached output from the network\n",
        "        e34_extracted = torch.max(e34_extracted_out, dim=1)[1]  # Get max output indices\n",
        "\n",
        "        ########### e1 count\n",
        "        e12_s_loader_image = e12_s_loader_image.cuda()  # Move e12 sample images to GPU\n",
        "        e12_s_loader_label = e12_s_loader_label.cuda()  # Move e12 sample labels to GPU\n",
        "        e12_inform_out = net(e12_s_loader_image).detach()  # Get detached output from the network\n",
        "        e12_inform = torch.max(e12_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e12_inform = torch.chunk(e12_inform, 10)  # Split e12 inform into 10 chunks\n",
        "        split_e12_inform = list(split_e12_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e12_inform)):\n",
        "            counts_E12 = torch.bincount(split_e12_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e12 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e12[category] = counts_E12[category]  # Update counts\n",
        "            split_e12_inform[i] = counts_e12  # Store updated counts\n",
        "\n",
        "        ########### e2 count\n",
        "        e34_s_loader_image = e34_s_loader_image.cuda()  # Move e34 sample images to GPU\n",
        "        e34_s_loader_label = e34_s_loader_label.cuda()  # Move e34 sample labels to GPU\n",
        "        e34_inform_out = net(e34_s_loader_image).detach()  # Get detached output from the network\n",
        "        e34_inform = torch.max(e34_inform_out, dim=1)[1]  # Get max output indices\n",
        "        split_e34_inform = torch.chunk(e34_inform, 10)  # Split e34 inform into 10 chunks\n",
        "        split_e34_inform = list(split_e34_inform)  # Convert split into list\n",
        "\n",
        "        for i in range(len(split_e34_inform)):\n",
        "            counts_E34 = torch.bincount(split_e34_inform[i], minlength=10)  # Count occurrences\n",
        "            counts_e34 = torch.zeros(10)  # Initialize counts\n",
        "            for category in range(10):\n",
        "                counts_e34[category] = counts_E34[category]  # Update counts\n",
        "            split_e34_inform[i] = counts_e34  # Store updated counts\n",
        "\n",
        "        ##########\n",
        "        ## IN_IN_pro\n",
        "        counts_i_12 = torch.bincount(e12_inform, minlength=10)  # Count occurrences for e12 inform\n",
        "        counts_i_34 = torch.bincount(e34_inform, minlength=10)  # Count occurrences for e34 inform\n",
        "        counts_in_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_in_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_12[category] = counts_i_12[category]  # Update counts for e12\n",
        "        total_samples_in_12 = len(e12_inform)  # Total samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_in_34[category] = counts_i_34[category]  # Update counts for e34\n",
        "        total_samples_in_34 = len(e34_inform)  # Total samples in e34\n",
        "\n",
        "        min_denominator_in_12 = total_samples_in_12 * 1e-6  # Minimum denominator for e12\n",
        "        probabilities_in_12 = counts_in_12 / (total_samples_in_12 + min_denominator_in_12)  # Probabilities for e12\n",
        "        probabilities_tensor_in_12 = probabilities_in_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12\n",
        "\n",
        "        min_denominator_in_34 = total_samples_in_34 * 1e-6  # Minimum denominator for e34\n",
        "        probabilities_in_34 = counts_in_34 / (total_samples_in_34 + min_denominator_in_34)  # Probabilities for e34\n",
        "        probabilities_tensor_in_34 = probabilities_in_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34\n",
        "\n",
        "        counts_ext_12 = torch.bincount(e12_extracted, minlength=10)  # Count occurrences for e12 extracted\n",
        "        counts_ext_34 = torch.bincount(e34_extracted, minlength=10)  # Count occurrences for e34 extracted\n",
        "        counts_extra_12 = torch.zeros(10)  # Initialize counts\n",
        "        counts_extra_34 = torch.zeros(10)  # Initialize counts\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_12[category] = counts_ext_12[category]  # Update counts for e12 extracted\n",
        "        total_extracted_in_12 = len(e12_extracted)  # Total extracted samples in e12\n",
        "\n",
        "        for category in range(10):\n",
        "            counts_extra_34[category] = counts_ext_34[category]  # Update counts for e34 extracted\n",
        "        total_extracted_in_34 = len(e34_extracted)  # Total extracted samples in e34\n",
        "\n",
        "        min_denominator_ext_12 = total_extracted_in_12 * 1e-6  # Minimum denominator for e12 extracted\n",
        "        probabilities_ext_12 = counts_extra_12 / (total_extracted_in_12 + min_denominator_ext_12)  # Probabilities for e12 extracted\n",
        "        probabilities_tensor_extra_12 = probabilities_ext_12.unsqueeze(0)  # Unsqueeze probabilities tensor for e12 extracted\n",
        "\n",
        "        min_denominator_ext_34 = total_extracted_in_34 * 1e-6  # Minimum denominator for e34 extracted\n",
        "        probabilities_ext_34 = counts_extra_34 / (total_extracted_in_34 + min_denominator_ext_34)  # Probabilities for e34 extracted\n",
        "        probabilities_tensor_extra_34 = probabilities_ext_34.unsqueeze(0)  # Unsqueeze probabilities tensor for e34 extracted\n",
        "\n",
        "        ############################### Variation x ###################################\n",
        "        in_12_all = probabilities_tensor_in_12\n",
        "        in_34_all = probabilities_tensor_in_34\n",
        "        in_12_extra = probabilities_tensor_extra_12\n",
        "        in_34_extra = probabilities_tensor_extra_34\n",
        "        weight_tra_probility_all = 1.0 / 2\n",
        "        dow_all = \\\n",
        "              (in_12_all*weight_tra_probility_all + in_34_all*weight_tra_probility_all +\n",
        "              in_12_extra*weight_tra_probility_all + in_34_extra*weight_tra_probility_all + 1e-30)\n",
        "        in_1_all = (in_12_all*weight_tra_probility_all) / dow_all\n",
        "        in_2_all = (in_34_all*weight_tra_probility_all) / dow_all\n",
        "        e_1_all = (in_12_extra*weight_tra_probility_all) / dow_all\n",
        "        e_2_all = (in_34_extra*weight_tra_probility_all) / dow_all\n",
        "        in_1_all = in_1_all[0]\n",
        "        in_2_all = in_2_all[0]\n",
        "        e_1_all = e_1_all[0]\n",
        "        e_2_all = e_2_all[0]\n",
        "        d_KL_all = torch.zeros(10)\n",
        "        k_divergence_all = (in_1_all + 1e-30) * torch.log(in_1_all / (e_1_all + 1e-30) + 1e-30)\n",
        "        k_divergence_all_ = (in_1_all + 1e-30) * torch.log(in_1_all / (e_2_all + 1e-30) + 1e-30)\n",
        "        d_KL_all = torch.max(abs(k_divergence_all))\n",
        "        d_KL_all_ = torch.max(abs(k_divergence_all_))\n",
        "        Variation_all = torch.max(d_KL_all, d_KL_all_)\n",
        "        Var_all.append(Variation_all)\n",
        "\n",
        "        ############################ Information ###################################\n",
        "        all_combinations = list(combinations(range(10), 2))\n",
        "        K = categrary_number * (categrary_number - 1)\n",
        "        result_tensor = torch.zeros(len(all_combinations) * 2)  # Calculate the difference of each pair of combinations and store in result tensor\n",
        "        for c in range(10):\n",
        "            for idx, (i, j) in enumerate(all_combinations):\n",
        "                s1 = abs(((split_e12_inform[c][i]/10) + 1e-30) * torch.log((split_e12_inform[c][i]/10) / ((split_e12_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                s3 = abs(((split_e34_inform[c][i]/10) + 1e-30) * torch.log((split_e34_inform[c][i]/10) / ((split_e34_inform[c][j]/10) + 1e-30) + 1e-30))\n",
        "                min_value = torch.min(s1, s3)\n",
        "                result_tensor[idx] = min_value.item()\n",
        "                idx_ = idx + len(all_combinations)\n",
        "                s1_ = abs(((split_e12_inform[c][j]/10) + 1e-30) * torch.log((split_e12_inform[c][j]/10) / ((split_e12_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                s3_ = abs(((split_e34_inform[c][j]/10) + 1e-30) * torch.log((split_e34_inform[c][j]/10) / ((split_e34_inform[c][i]/10) + 1e-30) + 1e-30))\n",
        "                min_value_ = torch.min(s1_, s3_)\n",
        "                result_tensor[idx_] = min_value_.item()\n",
        "        Information = torch.sum(result_tensor) / K\n",
        "        Inf.append(Information)\n",
        "\n",
        "        ############################ Generalization_Ratio ###################################\n",
        "        Generalization_Ratio = Variation_all * (Information + 1.0) / Information\n",
        "        Generalization_Ratio_.append(Generalization_Ratio)\n",
        "\n",
        "        ############################ Generalization Decision Process (GDP) ###################################\n",
        "        state_now = Generalization_Ratio\n",
        "        loss_before = loss_before.cuda()\n",
        "        state_before = state_before.cuda()\n",
        "        state_dis = state_now - state_before\n",
        "        result_tensor = torch.cat((loss_out_e1234.unsqueeze(0), loss_out_e3412.unsqueeze(0)), 0)\n",
        "\n",
        "        # Because two datasets loss_out_e1234 and loss_out_e3412 are used, and there are actions A1 and A2,\n",
        "        # choose one based on the reward\n",
        "        if state_dis >= 0.0:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "        else:\n",
        "            if los > 0.0:  # Not fitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 -= dis\n",
        "                    dis_before_A2 += dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 += dis\n",
        "                    dis_before_A2 -= dis\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "            else:  # Overfitting\n",
        "                if per == \"A1\":\n",
        "                    dis_before_A1 += dis * 2\n",
        "                    dis_before_A2 -= dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                elif per == \"A2\":\n",
        "                    dis_before_A1 -= dis * 2\n",
        "                    dis_before_A2 += dis * 2\n",
        "                    if dis_before_A1 >= dis_before_A2:\n",
        "                        loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                        per = \"A1\"\n",
        "                    else:\n",
        "                        loss = random.choices(result_tensor, weights=[1, 3])[0]\n",
        "                        dis_before_A1 = dis_before_A1 * (1 / 4)\n",
        "                        dis_before_A2 = dis_before_A2 * (3 / 4)\n",
        "                        per = \"A2\"\n",
        "                else:\n",
        "                    loss = random.choices(result_tensor, weights=[3, 1])[0]\n",
        "                    dis_before_A1 = dis_before_A1 * (3 / 4)\n",
        "                    dis_before_A2 = dis_before_A2 * (1 / 4)\n",
        "                    per = \"A1\"\n",
        "\n",
        "        ####################################\n",
        "        #### optimizer\n",
        "        los = loss - loss_before\n",
        "        state_before = state_now\n",
        "        optimizer_L.zero_grad()\n",
        "        loss.backward()\n",
        "        optimizer_L.step()\n",
        "        loss_before = loss\n",
        "        running_loss += loss.item()\n",
        "\n",
        "        # Print statistics\n",
        "        if step % 5 == 4:  # Print every 500 mini-batches\n",
        "            with torch.no_grad():  # 'with' is a context manager\n",
        "                s_test_image = s_test_image.cuda()\n",
        "                s_test_label = s_test_label.cuda()\n",
        "                s_tra_image = s_tra_image.cuda()\n",
        "                s_tra_label = s_tra_label.cuda()\n",
        "                outputs = net(s_test_image)  # [batch, 10]\n",
        "                predict_y = torch.max(outputs, dim=1)[1]\n",
        "                accuracy = torch.eq(predict_y, s_test_label).sum().item() / s_test_label.size(0)\n",
        "                a_test.append(float(accuracy))\n",
        "                outputs_t = net(s_tra_image)  # [batch, 10]\n",
        "                predict_y_t = torch.max(outputs_t, dim=1)[1]\n",
        "                accuracy_t = torch.eq(predict_y_t, s_tra_label).sum().item() / s_tra_label.size(0)\n",
        "                a_train.append(float(accuracy_t))\n",
        "                lossaaa.append(float(running_loss / 5))\n",
        "                print('[%d, %5d] train_loss: %.3f train_accuracy: %.3f test_accuracy: %.3f' %\n",
        "                      (epoch + 1, step + 1, running_loss / 5, accuracy_t, accuracy))\n",
        "                running_loss = 0.0\n",
        "                running_loss = 0.0"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KuXsls8IVH--"
      },
      "outputs": [],
      "source": [
        "# Writing training accuracy to file\n",
        "with open('Accuracy_a_train.txt', 'w') as file:\n",
        "    for i in range(len(a_train)):\n",
        "        file.write(str(a_train[i]))\n",
        "        if i < len(a_train) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing test accuracy to file\n",
        "with open('Accuracy_a_test.txt', 'w') as file:\n",
        "    for i in range(len(a_test)):\n",
        "        file.write(str(a_test[i]))\n",
        "        if i < len(a_test) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing loss values to file\n",
        "with open('Accuracy_loss.txt', 'w') as file:\n",
        "    for i in range(len(lossaaa)):\n",
        "        file.write(str(lossaaa[i]))\n",
        "        if i < len(lossaaa) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ui3gzl39VL11"
      },
      "outputs": [],
      "source": [
        "# Convert tensors to lists\n",
        "Inf_list = [tensor.item() for tensor in Inf]\n",
        "Var_av_list = [tensor.item() for tensor in Var_all]\n",
        "Generalization_Ratio_list = [tensor.item() for tensor in Generalization_Ratio_]\n",
        "\n",
        "# Writing Inf_list to file\n",
        "with open('Inf_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Inf_list)):\n",
        "        file.write(str(Inf_list[i]))\n",
        "        if i < len(Inf_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Var_av_list to file\n",
        "with open('Var_all_OOD.txt', 'w') as file:\n",
        "    for i in range(len(Var_av_list)):\n",
        "        file.write(str(Var_av_list[i]))\n",
        "        if i < len(Var_av_list) - 1:\n",
        "            file.write(', ')\n",
        "\n",
        "# Writing Generalization_Ratio_list to file\n",
        "with open('Generalization_Ratio_list.txt', 'w') as file:\n",
        "    for i in range(len(Generalization_Ratio_list)):\n",
        "        file.write(str(Generalization_Ratio_list[i]))\n",
        "        if i < len(Generalization_Ratio_list) - 1:\n",
        "            file.write(', ')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "b-ptmeTpVPL9"
      },
      "outputs": [],
      "source": [
        "import time\n",
        "import os  # Import the os module\n",
        "\n",
        "os.makedirs(os.path.join('./Models/'), exist_ok=True)  # Create the directory './Models/' if it doesn't exist\n",
        "model_path = './Models/'  # Define the model path\n",
        "\n",
        "rq = time.strftime('%Y%m%d%H%M', time.localtime(time.time()))  # Get the current time in the format 'YYYYMMDDHHMM'\n",
        "\n",
        "# Save the training results\n",
        "current_model_path = model_path + rq + \"_model.pkl\"  # Create the full path for the model file with the current timestamp\n",
        "torch.save(net, current_model_path)  # Save the model to the specified path\n",
        "print(\"Saved model file: \" + current_model_path)  # Print the path of the saved model file"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8Hanfqb2VR2K",
        "outputId": "3a46c8cb-9118-46c3-a2a6-369bd0af5c62"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "train: 0.910243350311262\n",
            "test: 0.8989807474518686\n",
            "loss: 0.06705401530489326\n"
          ]
        }
      ],
      "source": [
        "# Display the highest value that appears during training\n",
        "print(\"train:\", max(a_train))  # Print the highest value in the training data\n",
        "print(\"test:\", max(a_test))  # Print the highest value in the test data\n",
        "print(\"loss:\", min(lossaaa))  # Print the minimum value of the loss"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "D8qdXOd2VV4K",
        "outputId": "4afa1a08-d81f-4f14-b9e4-02106e701ea9"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "train: 0.9132502831257078\n",
            "test: 0.8891\n"
          ]
        }
      ],
      "source": [
        "# Load training and testing datasets from specified directories and apply transformations\n",
        "trainset = datasets.ImageFolder(root='/content/colorized-MNIST/training', transform=transform)\n",
        "testset = datasets.ImageFolder(root='/content/colorized-MNIST/testing', transform=transform)\n",
        "# Create DataLoader for training and testing datasets with specified batch sizes and other parameters\n",
        "trainloader = torch.utils.data.DataLoader(trainset, batch_size=8830, shuffle=True, num_workers=0)\n",
        "t_loader = torch.utils.data.DataLoader(testset, batch_size=10000, shuffle=False, num_workers=0)\n",
        "# Get an iterator for the training DataLoader\n",
        "trainloader_iter = iter(trainloader)\n",
        "# Get the next batch of images and labels from the training DataLoader\n",
        "tl_image, tl_label = next(trainloader_iter)\n",
        "# Move the training images and labels to the GPU\n",
        "tl_image = tl_image.cuda()\n",
        "tl_label = tl_label.cuda()\n",
        "# Pass the training images through the neural network to get the outputs\n",
        "tl_imageoutputs = net(tl_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict_y = torch.max(tl_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy = torch.eq(predict_y, tl_label).sum().item() / tl_label.size(0)\n",
        "# Print the training accuracy\n",
        "print(\"train:\", float(accuracy))\n",
        "# Get an iterator for the testing DataLoader\n",
        "t_data_iter = iter(t_loader)\n",
        "# Get the next batch of images and labels from the testing DataLoader\n",
        "t_image, t_label = next(t_data_iter)\n",
        "# Move the testing images and labels to the GPU\n",
        "t_image = t_image.cuda()\n",
        "t_label = t_label.cuda()\n",
        "# Pass the testing images through the neural network to get the outputs\n",
        "t_imageoutputs = net(t_image)  # [batch, 10]\n",
        "# Get the predicted labels by finding the index of the maximum value in the output tensor\n",
        "predict = torch.max(t_imageoutputs, dim=1)[1]\n",
        "# Calculate the accuracy of the predictions compared to the true labels\n",
        "accuracy_t = torch.eq(predict, t_label).sum().item() / t_label.size(0)\n",
        "# Print the testing accuracy\n",
        "print(\"test:\", float(accuracy_t))"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "L4",
      "machine_shape": "hm",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}