{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c1b4f720",
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "seed = 1\n",
    "sparsity = 0.8\n",
    "width = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6edfc04b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fda18cc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import os\n",
    "import torchvision.transforms as transforms\n",
    "import torchvision.datasets as datasets\n",
    "import time\n",
    "import copy\n",
    "import sys\n",
    "\n",
    "import random\n",
    "import numpy as np\n",
    "import torch\n",
    "from sklearn.decomposition import PCA\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as ss\n",
    "from timm.data import Mixup\n",
    "from timm.loss import SoftTargetCrossEntropy\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.init as init\n",
    "import torch.nn.functional as F\n",
    "from torch.autograd import Variable\n",
    "\n",
    "import sys\n",
    "import numpy as np\n",
    "import torch.nn.utils.prune as prune\n",
    "from datautils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c9b3aabc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_seed(seed=42, rank=0):\n",
    "    torch.manual_seed(seed + rank)\n",
    "    np.random.seed(seed + rank)\n",
    "    random.seed(seed + rank)\n",
    "\n",
    "random_seed(seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2348c12a",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader, val_loader = get_loaders(\n",
    "    \"imagenet\", path=\"\",\n",
    "    batchsize=256, workers=8,\n",
    "    nsamples=-1, seed=0,\n",
    "    noaug=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c50e599a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(train_loader, model, criterion, optimizer, scaler, epoch):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    data_time = AverageMeter('Data', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    #top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    #top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(train_loader),\n",
    "        [batch_time, data_time, losses],\n",
    "        prefix=\"Epoch: [{}]\".format(epoch))\n",
    "\n",
    "    # switch to train mode\n",
    "    model.train()\n",
    "\n",
    "    end = time.time()\n",
    "    for i, (images, target) in enumerate(train_loader):\n",
    "        # measure data loading time\n",
    "        data_time.update(time.time() - end)\n",
    "        images = images.cuda(non_blocking=True)\n",
    "        target = target.cuda(non_blocking=True)\n",
    "\n",
    "        # compute output\n",
    "        with torch.cuda.amp.autocast():\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "        # measure accuracy and record loss\n",
    "        #acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "        losses.update(loss.item(), images.size(0))\n",
    "        #top1.update(acc1[0], images.size(0))\n",
    "        #top5.update(acc5[0], images.size(0))\n",
    "\n",
    "        # compute gradient and do SGD step\n",
    "        optimizer.zero_grad()\n",
    "        scaler.scale(loss).backward()\n",
    "        scaler.step(optimizer)\n",
    "        scaler.update()\n",
    "        #loss.backward()\n",
    "        #optimizer.step()\n",
    "\n",
    "        # measure elapsed time\n",
    "        batch_time.update(time.time() - end)\n",
    "        end = time.time()\n",
    "\n",
    "        if i % 50 == 0:\n",
    "            progress.display(i)\n",
    "        if epoch == -1 and i == 50:\n",
    "            break\n",
    "\n",
    "    return losses.avg\n",
    "\n",
    "\n",
    "def validate(val_loader, model, criterion):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(val_loader),\n",
    "        [batch_time, losses, top1, top5],\n",
    "        prefix='Test: ')\n",
    "\n",
    "    # switch to evaluate mode\n",
    "    model.eval()\n",
    "\n",
    "    with torch.no_grad():\n",
    "        end = time.time()\n",
    "        for i, (images, target) in enumerate(val_loader):\n",
    "            images = images.cuda(non_blocking=True)\n",
    "            target = target.cuda(non_blocking=True)\n",
    "\n",
    "            # compute output\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "            # measure accuracy and record loss\n",
    "            acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "            losses.update(loss.item(), images.size(0))\n",
    "            top1.update(acc1[0], images.size(0))\n",
    "            top5.update(acc5[0], images.size(0))\n",
    "\n",
    "            # measure elapsed time\n",
    "            batch_time.update(time.time() - end)\n",
    "            end = time.time()\n",
    "\n",
    "            if i % 50 == 0:\n",
    "                progress.display(i)\n",
    "\n",
    "        # TODO: this should also be done with the ProgressMeter\n",
    "        #print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'\n",
    "        #      .format(top1=top1, top5=top5))\n",
    "\n",
    "    return top1.avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "698abd39",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):\n",
    "    torch.save(state, filename)\n",
    "    if is_best:\n",
    "        shutil.copyfile(filename, 'model_best.pth.tar')\n",
    "\n",
    "\n",
    "class AverageMeter(object):\n",
    "    \"\"\"Computes and stores the average and current value\"\"\"\n",
    "    def __init__(self, name, fmt=':f'):\n",
    "        self.name = name\n",
    "        self.fmt = fmt\n",
    "        self.reset()\n",
    "\n",
    "    def reset(self):\n",
    "        self.val = 0\n",
    "        self.avg = 0\n",
    "        self.sum = 0\n",
    "        self.count = 0\n",
    "\n",
    "    def update(self, val, n=1):\n",
    "        self.val = val\n",
    "        self.sum += val * n\n",
    "        self.count += n\n",
    "        self.avg = self.sum / self.count\n",
    "\n",
    "    def __str__(self):\n",
    "        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'\n",
    "        return fmtstr.format(**self.__dict__)\n",
    "\n",
    "\n",
    "class ProgressMeter(object):\n",
    "    def __init__(self, num_batches, meters, prefix=\"\"):\n",
    "        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)\n",
    "        self.meters = meters\n",
    "        self.prefix = prefix\n",
    "\n",
    "    def display(self, batch):\n",
    "        entries = [self.prefix + self.batch_fmtstr.format(batch)]\n",
    "        entries += [str(meter) for meter in self.meters]\n",
    "        print('\\t'.join(entries))\n",
    "\n",
    "    def _get_batch_fmtstr(self, num_batches):\n",
    "        num_digits = len(str(num_batches // 1))\n",
    "        fmt = '{:' + str(num_digits) + 'd}'\n",
    "        return '[' + fmt + '/' + fmt.format(num_batches) + ']'\n",
    "\n",
    "\n",
    "def adjust_learning_rate(optimizer, epoch):\n",
    "    \"\"\"Sets the learning rate to the initial LR decayed by 10 every 30 epochs\"\"\"\n",
    "    lr = LR * (0.1 ** (epoch // 30))\n",
    "    for param_group in optimizer.param_groups:\n",
    "        param_group['lr'] = lr\n",
    "\n",
    "\n",
    "def accuracy(output, target, topk=(1,)):\n",
    "    \"\"\"Computes the accuracy over the k top predictions for the specified values of k\"\"\"\n",
    "    with torch.no_grad():\n",
    "        maxk = max(topk)\n",
    "        batch_size = target.size(0)\n",
    "\n",
    "        _, pred = output.topk(maxk, 1, True, True)\n",
    "        pred = pred.t()\n",
    "        correct = pred.eq(target.view(1, -1).expand_as(pred))\n",
    "\n",
    "        res = []\n",
    "        for k in topk:\n",
    "            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)\n",
    "            res.append(correct_k.mul_(100.0 / batch_size))\n",
    "        return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "dba446b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ResNet(\n",
      "  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
      "  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "  (relu): ReLU(inplace=True)\n",
      "  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
      "  (layer1): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (layer2): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (layer3): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (4): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (5): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (layer4): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "        (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
      "  (fc): Linear(in_features=2048, out_features=1000, bias=True)\n",
      ")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "layer1.0.conv1 torch.Size([64, 64, 1, 1])\n",
      "layer1.0.conv2 torch.Size([64, 64, 3, 3])\n",
      "layer1.0.conv3 torch.Size([256, 64, 1, 1])\n",
      "layer1.0.downsample.0 torch.Size([256, 64, 1, 1])\n",
      "layer1.1.conv1 torch.Size([64, 256, 1, 1])\n",
      "layer1.1.conv2 torch.Size([64, 64, 3, 3])\n",
      "layer1.1.conv3 torch.Size([256, 64, 1, 1])\n",
      "layer1.2.conv1 torch.Size([64, 256, 1, 1])\n",
      "layer1.2.conv2 torch.Size([64, 64, 3, 3])\n",
      "layer1.2.conv3 torch.Size([256, 64, 1, 1])\n",
      "layer2.0.conv1 torch.Size([128, 256, 1, 1])\n",
      "layer2.0.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.0.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer2.0.downsample.0 torch.Size([512, 256, 1, 1])\n",
      "layer2.1.conv1 torch.Size([128, 512, 1, 1])\n",
      "layer2.1.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.1.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer2.2.conv1 torch.Size([128, 512, 1, 1])\n",
      "layer2.2.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.2.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer2.3.conv1 torch.Size([128, 512, 1, 1])\n",
      "layer2.3.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.3.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer3.0.conv1 torch.Size([256, 512, 1, 1])\n",
      "layer3.0.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.0.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.0.downsample.0 torch.Size([1024, 512, 1, 1])\n",
      "layer3.1.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.1.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.1.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.2.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.2.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.2.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.3.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.3.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.3.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.4.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.4.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.4.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.5.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.5.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.5.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer4.0.conv1 torch.Size([512, 1024, 1, 1])\n",
      "layer4.0.conv2 torch.Size([512, 512, 3, 3])\n",
      "layer4.0.conv3 torch.Size([2048, 512, 1, 1])\n",
      "layer4.0.downsample.0 torch.Size([2048, 1024, 1, 1])\n",
      "layer4.1.conv1 torch.Size([512, 2048, 1, 1])\n",
      "layer4.1.conv2 torch.Size([512, 512, 3, 3])\n",
      "layer4.1.conv3 torch.Size([2048, 512, 1, 1])\n",
      "layer4.2.conv1 torch.Size([512, 2048, 1, 1])\n",
      "layer4.2.conv2 torch.Size([512, 512, 3, 3])\n",
      "layer4.2.conv3 torch.Size([2048, 512, 1, 1])\n",
      "tot 23445504\n",
      "Test: [  0/196]\tTime  7.794 ( 7.794)\tLoss 1.0866e+01 (1.0866e+01)\tAcc@1   0.00 (  0.00)\tAcc@5   0.00 (  0.00)\n",
      "Test: [ 50/196]\tTime  0.375 ( 0.519)\tLoss 1.5000e+01 (1.2607e+01)\tAcc@1   0.00 (  0.57)\tAcc@5   0.00 (  2.25)\n",
      "Test: [100/196]\tTime  0.375 ( 0.448)\tLoss 1.1366e+01 (1.2782e+01)\tAcc@1   0.00 (  0.48)\tAcc@5   0.00 (  1.72)\n",
      "Test: [150/196]\tTime  0.376 ( 0.424)\tLoss 1.0232e+01 (1.2573e+01)\tAcc@1   0.00 (  0.39)\tAcc@5   1.17 (  1.61)\n",
      "start acc no bn 0.3799999952316284\n",
      "Epoch: [-1][   0/5005]\tTime  3.164 ( 3.164)\tData  2.389 ( 2.389)\tLoss 2.5513e+00 (2.5513e+00)\n",
      "Epoch: [-1][  50/5005]\tTime  0.558 ( 0.609)\tData  0.000 ( 0.047)\tLoss 2.5676e+00 (2.5204e+00)\n",
      "Test: [  0/196]\tTime  3.366 ( 3.366)\tLoss 1.9885e+00 (1.9885e+00)\tAcc@1  62.50 ( 62.50)\tAcc@5  86.33 ( 86.33)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 2.1062e+00 (2.1067e+00)\tAcc@1  64.45 ( 60.72)\tAcc@5  87.11 ( 83.84)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 2.5162e+00 (2.2601e+00)\tAcc@1  50.00 ( 57.65)\tAcc@5  76.17 ( 81.59)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 2.4542e+00 (2.3895e+00)\tAcc@1  56.25 ( 55.21)\tAcc@5  76.17 ( 79.03)\n",
      "start acc bn 54.435997009277344 4698510 0.20040132214688156\n",
      "Epoch: [0][   0/5005]\tTime  2.966 ( 2.966)\tData  2.402 ( 2.402)\tLoss 2.6950e+00 (2.6950e+00)\n",
      "Epoch: [0][  50/5005]\tTime  0.559 ( 0.606)\tData  0.000 ( 0.047)\tLoss 1.2517e+00 (1.4937e+00)\n",
      "Epoch: [0][ 100/5005]\tTime  0.559 ( 0.582)\tData  0.000 ( 0.024)\tLoss 1.1292e+00 (1.3988e+00)\n",
      "Epoch: [0][ 150/5005]\tTime  0.558 ( 0.574)\tData  0.000 ( 0.016)\tLoss 1.3272e+00 (1.3571e+00)\n",
      "Epoch: [0][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.012)\tLoss 1.3258e+00 (1.3304e+00)\n",
      "Epoch: [0][ 250/5005]\tTime  0.558 ( 0.568)\tData  0.000 ( 0.010)\tLoss 1.2106e+00 (1.3091e+00)\n",
      "Epoch: [0][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.2413e+00 (1.2881e+00)\n",
      "Epoch: [0][ 350/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 9.4550e-01 (1.2752e+00)\n",
      "Epoch: [0][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1637e+00 (1.2657e+00)\n",
      "Epoch: [0][ 450/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.2226e+00 (1.2556e+00)\n",
      "Epoch: [0][ 500/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.1134e+00 (1.2471e+00)\n",
      "Epoch: [0][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.0511e+00 (1.2412e+00)\n",
      "Epoch: [0][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.2377e+00 (1.2365e+00)\n",
      "Epoch: [0][ 650/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.0552e+00 (1.2306e+00)\n",
      "Epoch: [0][ 700/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.3601e+00 (1.2263e+00)\n",
      "Epoch: [0][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2952e+00 (1.2227e+00)\n",
      "Epoch: [0][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2759e+00 (1.2181e+00)\n",
      "Epoch: [0][ 850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.2823e+00 (1.2158e+00)\n",
      "Epoch: [0][ 900/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1295e+00 (1.2120e+00)\n",
      "Epoch: [0][ 950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1297e+00 (1.2091e+00)\n",
      "Epoch: [0][1000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1113e+00 (1.2060e+00)\n",
      "Epoch: [0][1050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1108e+00 (1.2035e+00)\n",
      "Epoch: [0][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1123e+00 (1.2013e+00)\n",
      "Epoch: [0][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1824e+00 (1.1990e+00)\n",
      "Epoch: [0][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2630e+00 (1.1972e+00)\n",
      "Epoch: [0][1250/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0379e+00 (1.1951e+00)\n",
      "Epoch: [0][1300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0608e+00 (1.1929e+00)\n",
      "Epoch: [0][1350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1463e+00 (1.1904e+00)\n",
      "Epoch: [0][1400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0694e+00 (1.1890e+00)\n",
      "Epoch: [0][1450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1531e+00 (1.1875e+00)\n",
      "Epoch: [0][1500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0818e+00 (1.1863e+00)\n",
      "Epoch: [0][1550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1364e+00 (1.1845e+00)\n",
      "Epoch: [0][1600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0454e+00 (1.1826e+00)\n",
      "Epoch: [0][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1249e+00 (1.1815e+00)\n",
      "Epoch: [0][1700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0219e+00 (1.1805e+00)\n",
      "Epoch: [0][1750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.8875e-01 (1.1789e+00)\n",
      "Epoch: [0][1800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3806e+00 (1.1775e+00)\n",
      "Epoch: [0][1850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3311e+00 (1.1763e+00)\n",
      "Epoch: [0][1900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4356e+00 (1.1752e+00)\n",
      "Epoch: [0][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2117e+00 (1.1737e+00)\n",
      "Epoch: [0][2000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1608e+00 (1.1727e+00)\n",
      "Epoch: [0][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0156e+00 (1.1719e+00)\n",
      "Epoch: [0][2100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8069e-01 (1.1713e+00)\n",
      "Epoch: [0][2150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.5531e+00 (1.1709e+00)\n",
      "Epoch: [0][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0845e+00 (1.1699e+00)\n",
      "Epoch: [0][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1589e+00 (1.1692e+00)\n",
      "Epoch: [0][2300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0634e+00 (1.1682e+00)\n",
      "Epoch: [0][2350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0109e+00 (1.1674e+00)\n",
      "Epoch: [0][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1623e+00 (1.1664e+00)\n",
      "Epoch: [0][2450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1523e+00 (1.1653e+00)\n",
      "Epoch: [0][2500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.8567e-01 (1.1645e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [0][2550/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0736e+00 (1.1638e+00)\n",
      "Epoch: [0][2600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0988e+00 (1.1632e+00)\n",
      "Epoch: [0][2650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.0896e-01 (1.1628e+00)\n",
      "Epoch: [0][2700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0670e+00 (1.1624e+00)\n",
      "Epoch: [0][2750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2468e+00 (1.1619e+00)\n",
      "Epoch: [0][2800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0274e+00 (1.1613e+00)\n",
      "Epoch: [0][2850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0289e+00 (1.1611e+00)\n",
      "Epoch: [0][2900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0493e+00 (1.1602e+00)\n",
      "Epoch: [0][2950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0800e+00 (1.1596e+00)\n",
      "Epoch: [0][3000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1059e+00 (1.1591e+00)\n",
      "Epoch: [0][3050/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.2041e-01 (1.1583e+00)\n",
      "Epoch: [0][3100/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0689e+00 (1.1573e+00)\n",
      "Epoch: [0][3150/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2221e+00 (1.1571e+00)\n",
      "Epoch: [0][3200/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1699e+00 (1.1568e+00)\n",
      "Epoch: [0][3250/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0890e+00 (1.1562e+00)\n",
      "Epoch: [0][3300/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0835e+00 (1.1556e+00)\n",
      "Epoch: [0][3350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2393e+00 (1.1552e+00)\n",
      "Epoch: [0][3400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.0703e-01 (1.1548e+00)\n",
      "Epoch: [0][3450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0656e+00 (1.1544e+00)\n",
      "Epoch: [0][3500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1863e+00 (1.1540e+00)\n",
      "Epoch: [0][3550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1631e+00 (1.1540e+00)\n",
      "Epoch: [0][3600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1746e+00 (1.1535e+00)\n",
      "Epoch: [0][3650/5005]\tTime  0.560 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1165e+00 (1.1533e+00)\n",
      "Epoch: [0][3700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.7342e-01 (1.1530e+00)\n",
      "Epoch: [0][3750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1087e+00 (1.1525e+00)\n",
      "Epoch: [0][3800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0334e+00 (1.1523e+00)\n",
      "Epoch: [0][3850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0746e+00 (1.1517e+00)\n",
      "Epoch: [0][3900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.8832e-01 (1.1516e+00)\n",
      "Epoch: [0][3950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0305e+00 (1.1511e+00)\n",
      "Epoch: [0][4000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1153e+00 (1.1506e+00)\n",
      "Epoch: [0][4050/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.7921e-01 (1.1503e+00)\n",
      "Epoch: [0][4100/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9387e-01 (1.1499e+00)\n",
      "Epoch: [0][4150/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2176e+00 (1.1498e+00)\n",
      "Epoch: [0][4200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.5144e-01 (1.1494e+00)\n",
      "Epoch: [0][4250/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0402e+00 (1.1494e+00)\n",
      "Epoch: [0][4300/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2359e+00 (1.1493e+00)\n",
      "Epoch: [0][4350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0633e+00 (1.1489e+00)\n",
      "Epoch: [0][4400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0912e+00 (1.1484e+00)\n",
      "Epoch: [0][4450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1629e+00 (1.1480e+00)\n",
      "Epoch: [0][4500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0557e+00 (1.1478e+00)\n",
      "Epoch: [0][4550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2175e+00 (1.1476e+00)\n",
      "Epoch: [0][4600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2781e+00 (1.1472e+00)\n",
      "Epoch: [0][4650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2400e+00 (1.1468e+00)\n",
      "Epoch: [0][4700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0537e+00 (1.1463e+00)\n",
      "Epoch: [0][4750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.3664e-01 (1.1462e+00)\n",
      "Epoch: [0][4800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0366e+00 (1.1457e+00)\n",
      "Epoch: [0][4850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0202e+00 (1.1454e+00)\n",
      "Epoch: [0][4900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0278e+00 (1.1450e+00)\n",
      "Epoch: [0][4950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0187e+00 (1.1446e+00)\n",
      "Epoch: [0][5000/5005]\tTime  0.557 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1414e+00 (1.1446e+00)\n",
      "Test: [  0/196]\tTime  3.295 ( 3.295)\tLoss 6.1498e-01 (6.1498e-01)\tAcc@1  83.59 ( 83.59)\tAcc@5  98.83 ( 98.83)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.434)\tLoss 4.9186e-01 (8.7481e-01)\tAcc@1  87.11 ( 76.62)\tAcc@5  97.27 ( 93.90)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 1.4548e+00 (1.0130e+00)\tAcc@1  62.11 ( 73.57)\tAcc@5  87.89 ( 92.30)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.2884e+00 (1.1395e+00)\tAcc@1  74.22 ( 71.25)\tAcc@5  87.89 ( 90.54)\n",
      "epoch 0 1.1445480195366338 70.21199798583984 0.0095 4698510 0.20040132214688156\n",
      "Epoch: [1][   0/5005]\tTime  3.147 ( 3.147)\tData  2.588 ( 2.588)\tLoss 1.1434e+00 (1.1434e+00)\n",
      "Epoch: [1][  50/5005]\tTime  0.558 ( 0.609)\tData  0.000 ( 0.051)\tLoss 1.0393e+00 (1.0634e+00)\n",
      "Epoch: [1][ 100/5005]\tTime  0.559 ( 0.584)\tData  0.000 ( 0.026)\tLoss 1.0567e+00 (1.0726e+00)\n",
      "Epoch: [1][ 150/5005]\tTime  0.563 ( 0.576)\tData  0.000 ( 0.017)\tLoss 1.0925e+00 (1.0677e+00)\n",
      "Epoch: [1][ 200/5005]\tTime  0.558 ( 0.571)\tData  0.000 ( 0.013)\tLoss 1.3311e+00 (1.0630e+00)\n",
      "Epoch: [1][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.0216e+00 (1.0645e+00)\n",
      "Epoch: [1][ 300/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.009)\tLoss 1.1826e+00 (1.0680e+00)\n",
      "Epoch: [1][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.008)\tLoss 1.0371e+00 (1.0670e+00)\n",
      "Epoch: [1][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 1.0437e+00 (1.0664e+00)\n",
      "Epoch: [1][ 450/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.1622e+00 (1.0654e+00)\n",
      "Epoch: [1][ 500/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.3938e-01 (1.0634e+00)\n",
      "Epoch: [1][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 8.7466e-01 (1.0647e+00)\n",
      "Epoch: [1][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0429e+00 (1.0654e+00)\n",
      "Epoch: [1][ 650/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 9.9726e-01 (1.0635e+00)\n",
      "Epoch: [1][ 700/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 9.1938e-01 (1.0630e+00)\n",
      "Epoch: [1][ 750/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.0946e+00 (1.0626e+00)\n",
      "Epoch: [1][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.8641e-01 (1.0622e+00)\n",
      "Epoch: [1][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1112e+00 (1.0622e+00)\n",
      "Epoch: [1][ 900/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1732e+00 (1.0622e+00)\n",
      "Epoch: [1][ 950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.6624e-01 (1.0615e+00)\n",
      "Epoch: [1][1000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.7635e-01 (1.0617e+00)\n",
      "Epoch: [1][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1435e+00 (1.0619e+00)\n",
      "Epoch: [1][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0497e+00 (1.0611e+00)\n",
      "Epoch: [1][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5154e-01 (1.0622e+00)\n",
      "Epoch: [1][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0286e+00 (1.0627e+00)\n",
      "Epoch: [1][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0001e+00 (1.0627e+00)\n",
      "Epoch: [1][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0048e+00 (1.0629e+00)\n",
      "Epoch: [1][1350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6232e-01 (1.0632e+00)\n",
      "Epoch: [1][1400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1764e+00 (1.0640e+00)\n",
      "Epoch: [1][1450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.5124e+00 (1.0639e+00)\n",
      "Epoch: [1][1500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4156e+00 (1.0640e+00)\n",
      "Epoch: [1][1550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1100e+00 (1.0646e+00)\n",
      "Epoch: [1][1600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0401e+00 (1.0658e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [1][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1183e+00 (1.0667e+00)\n",
      "Epoch: [1][1700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1739e+00 (1.0671e+00)\n",
      "Epoch: [1][1750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2097e+00 (1.0676e+00)\n",
      "Epoch: [1][1800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2935e+00 (1.0683e+00)\n",
      "Epoch: [1][1850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2205e+00 (1.0682e+00)\n",
      "Epoch: [1][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1254e+00 (1.0685e+00)\n",
      "Epoch: [1][1950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3243e+00 (1.0686e+00)\n",
      "Epoch: [1][2000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1900e+00 (1.0695e+00)\n",
      "Epoch: [1][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6963e-01 (1.0697e+00)\n",
      "Epoch: [1][2100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0618e+00 (1.0693e+00)\n",
      "Epoch: [1][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1612e+00 (1.0697e+00)\n",
      "Epoch: [1][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0446e+00 (1.0700e+00)\n",
      "Epoch: [1][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0182e+00 (1.0702e+00)\n",
      "Epoch: [1][2300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0314e+00 (1.0701e+00)\n",
      "Epoch: [1][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1465e+00 (1.0703e+00)\n",
      "Epoch: [1][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1575e+00 (1.0705e+00)\n",
      "Epoch: [1][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1921e+00 (1.0707e+00)\n",
      "Epoch: [1][2500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1155e+00 (1.0708e+00)\n",
      "Epoch: [1][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1590e+00 (1.0712e+00)\n",
      "Epoch: [1][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0174e+00 (1.0714e+00)\n",
      "Epoch: [1][2650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1079e+00 (1.0715e+00)\n",
      "Epoch: [1][2700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0900e+00 (1.0721e+00)\n",
      "Epoch: [1][2750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.7605e-01 (1.0718e+00)\n",
      "Epoch: [1][2800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1899e+00 (1.0719e+00)\n",
      "Epoch: [1][2850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2422e+00 (1.0720e+00)\n",
      "Epoch: [1][2900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2046e+00 (1.0723e+00)\n",
      "Epoch: [1][2950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9172e-01 (1.0725e+00)\n",
      "Epoch: [1][3000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.8707e-01 (1.0725e+00)\n",
      "Epoch: [1][3050/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0335e+00 (1.0727e+00)\n",
      "Epoch: [1][3100/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0705e+00 (1.0727e+00)\n",
      "Epoch: [1][3150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0480e+00 (1.0726e+00)\n",
      "Epoch: [1][3200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1146e+00 (1.0726e+00)\n",
      "Epoch: [1][3250/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0617e+00 (1.0729e+00)\n",
      "Epoch: [1][3300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0116e+00 (1.0729e+00)\n",
      "Epoch: [1][3350/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0297e+00 (1.0729e+00)\n",
      "Epoch: [1][3400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1097e+00 (1.0727e+00)\n",
      "Epoch: [1][3450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1380e+00 (1.0727e+00)\n",
      "Epoch: [1][3500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2695e+00 (1.0728e+00)\n",
      "Epoch: [1][3550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0076e+00 (1.0729e+00)\n",
      "Epoch: [1][3600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1941e+00 (1.0729e+00)\n",
      "Epoch: [1][3650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0217e+00 (1.0734e+00)\n",
      "Epoch: [1][3700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2758e+00 (1.0737e+00)\n",
      "Epoch: [1][3750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0693e+00 (1.0742e+00)\n",
      "Epoch: [1][3800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1189e+00 (1.0746e+00)\n",
      "Epoch: [1][3850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1574e+00 (1.0748e+00)\n",
      "Epoch: [1][3900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0570e+00 (1.0752e+00)\n",
      "Epoch: [1][3950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9230e-01 (1.0749e+00)\n",
      "Epoch: [1][4000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0116e+00 (1.0747e+00)\n",
      "Epoch: [1][4050/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1399e+00 (1.0751e+00)\n",
      "Epoch: [1][4100/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.7028e-01 (1.0753e+00)\n",
      "Epoch: [1][4150/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1270e+00 (1.0754e+00)\n",
      "Epoch: [1][4200/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1839e+00 (1.0756e+00)\n",
      "Epoch: [1][4250/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0965e+00 (1.0754e+00)\n",
      "Epoch: [1][4300/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0489e+00 (1.0753e+00)\n",
      "Epoch: [1][4350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0051e+00 (1.0755e+00)\n",
      "Epoch: [1][4400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1286e+00 (1.0757e+00)\n",
      "Epoch: [1][4450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2028e+00 (1.0758e+00)\n",
      "Epoch: [1][4500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.7550e-01 (1.0759e+00)\n",
      "Epoch: [1][4550/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9923e-01 (1.0762e+00)\n",
      "Epoch: [1][4600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2185e+00 (1.0762e+00)\n",
      "Epoch: [1][4650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4217e-01 (1.0762e+00)\n",
      "Epoch: [1][4700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2739e+00 (1.0765e+00)\n",
      "Epoch: [1][4750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1906e+00 (1.0768e+00)\n",
      "Epoch: [1][4800/5005]\tTime  0.560 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0832e+00 (1.0767e+00)\n",
      "Epoch: [1][4850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1358e+00 (1.0770e+00)\n",
      "Epoch: [1][4900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0044e+00 (1.0769e+00)\n",
      "Epoch: [1][4950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.3799e-01 (1.0769e+00)\n",
      "Epoch: [1][5000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0957e+00 (1.0769e+00)\n",
      "Test: [  0/196]\tTime  3.304 ( 3.304)\tLoss 6.4085e-01 (6.4085e-01)\tAcc@1  82.81 ( 82.81)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 4.9244e-01 (8.6065e-01)\tAcc@1  87.11 ( 77.09)\tAcc@5  97.27 ( 93.91)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.7216e+00 (9.8586e-01)\tAcc@1  53.91 ( 74.36)\tAcc@5  85.55 ( 92.66)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.2413e+00 (1.1179e+00)\tAcc@1  70.70 ( 71.69)\tAcc@5  86.72 ( 90.87)\n",
      "epoch 1 1.0768750126684925 70.81999969482422 0.009000000000000001 4698510 0.20040132214688156\n",
      "Epoch: [2][   0/5005]\tTime  3.128 ( 3.128)\tData  2.568 ( 2.568)\tLoss 1.0112e+00 (1.0112e+00)\n",
      "Epoch: [2][  50/5005]\tTime  0.558 ( 0.609)\tData  0.000 ( 0.051)\tLoss 1.0328e+00 (1.0292e+00)\n",
      "Epoch: [2][ 100/5005]\tTime  0.559 ( 0.584)\tData  0.000 ( 0.026)\tLoss 9.7673e-01 (1.0233e+00)\n",
      "Epoch: [2][ 150/5005]\tTime  0.558 ( 0.576)\tData  0.000 ( 0.017)\tLoss 1.1205e+00 (1.0229e+00)\n",
      "Epoch: [2][ 200/5005]\tTime  0.558 ( 0.571)\tData  0.000 ( 0.013)\tLoss 1.0338e+00 (1.0275e+00)\n",
      "Epoch: [2][ 250/5005]\tTime  0.558 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.0349e+00 (1.0287e+00)\n",
      "Epoch: [2][ 300/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.009)\tLoss 9.5995e-01 (1.0267e+00)\n",
      "Epoch: [2][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.008)\tLoss 1.1580e+00 (1.0262e+00)\n",
      "Epoch: [2][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 9.6593e-01 (1.0267e+00)\n",
      "Epoch: [2][ 450/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.2838e+00 (1.0252e+00)\n",
      "Epoch: [2][ 500/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0094e+00 (1.0259e+00)\n",
      "Epoch: [2][ 550/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.0156e+00 (1.0255e+00)\n",
      "Epoch: [2][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.3179e-01 (1.0273e+00)\n",
      "Epoch: [2][ 650/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.0190e+00 (1.0285e+00)\n",
      "Epoch: [2][ 700/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 8.7522e-01 (1.0293e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [2][ 750/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.1033e+00 (1.0289e+00)\n",
      "Epoch: [2][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1044e+00 (1.0291e+00)\n",
      "Epoch: [2][ 850/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0012e+00 (1.0304e+00)\n",
      "Epoch: [2][ 900/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.5912e-01 (1.0298e+00)\n",
      "Epoch: [2][ 950/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0624e+00 (1.0298e+00)\n",
      "Epoch: [2][1000/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0298e+00 (1.0300e+00)\n",
      "Epoch: [2][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1333e+00 (1.0315e+00)\n",
      "Epoch: [2][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.9054e-01 (1.0306e+00)\n",
      "Epoch: [2][1150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1295e+00 (1.0315e+00)\n",
      "Epoch: [2][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1133e+00 (1.0315e+00)\n",
      "Epoch: [2][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1822e+00 (1.0322e+00)\n",
      "Epoch: [2][1300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.1608e-01 (1.0334e+00)\n",
      "Epoch: [2][1350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6958e-01 (1.0335e+00)\n",
      "Epoch: [2][1400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0252e+00 (1.0333e+00)\n",
      "Epoch: [2][1450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1289e+00 (1.0340e+00)\n",
      "Epoch: [2][1500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.4659e-01 (1.0344e+00)\n",
      "Epoch: [2][1550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1416e+00 (1.0347e+00)\n",
      "Epoch: [2][1600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2144e+00 (1.0353e+00)\n",
      "Epoch: [2][1650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.4284e-01 (1.0354e+00)\n",
      "Epoch: [2][1700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6611e-01 (1.0355e+00)\n",
      "Epoch: [2][1750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.7877e-01 (1.0351e+00)\n",
      "Epoch: [2][1800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2498e+00 (1.0352e+00)\n",
      "Epoch: [2][1850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1351e+00 (1.0359e+00)\n",
      "Epoch: [2][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1272e+00 (1.0354e+00)\n",
      "Epoch: [2][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0256e+00 (1.0354e+00)\n",
      "Epoch: [2][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0635e+00 (1.0359e+00)\n",
      "Epoch: [2][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0300e+00 (1.0363e+00)\n",
      "Epoch: [2][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1849e+00 (1.0362e+00)\n",
      "Epoch: [2][2150/5005]\tTime  0.556 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2640e-01 (1.0368e+00)\n",
      "Epoch: [2][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0930e+00 (1.0373e+00)\n",
      "Epoch: [2][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0039e+00 (1.0378e+00)\n",
      "Epoch: [2][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6138e-01 (1.0380e+00)\n",
      "Epoch: [2][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0929e+00 (1.0381e+00)\n",
      "Epoch: [2][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1107e+00 (1.0380e+00)\n",
      "Epoch: [2][2450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0883e+00 (1.0385e+00)\n",
      "Epoch: [2][2500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0734e+00 (1.0389e+00)\n",
      "Epoch: [2][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0203e+00 (1.0389e+00)\n",
      "Epoch: [2][2600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2055e+00 (1.0386e+00)\n",
      "Epoch: [2][2650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0214e+00 (1.0387e+00)\n",
      "Epoch: [2][2700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1118e+00 (1.0390e+00)\n",
      "Epoch: [2][2750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.4750e-01 (1.0396e+00)\n",
      "Epoch: [2][2800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0404e+00 (1.0396e+00)\n",
      "Epoch: [2][2850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9719e-01 (1.0401e+00)\n",
      "Epoch: [2][2900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1174e+00 (1.0400e+00)\n",
      "Epoch: [2][2950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.5513e-01 (1.0402e+00)\n",
      "Epoch: [2][3000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.5979e-01 (1.0404e+00)\n",
      "Epoch: [2][3050/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.5070e-01 (1.0406e+00)\n",
      "Epoch: [2][3100/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0495e+00 (1.0409e+00)\n",
      "Epoch: [2][3150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1749e+00 (1.0411e+00)\n",
      "Epoch: [2][3200/5005]\tTime  0.560 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1934e+00 (1.0410e+00)\n",
      "Epoch: [2][3250/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2290e+00 (1.0412e+00)\n",
      "Epoch: [2][3300/5005]\tTime  0.560 ( 0.559)\tData  0.001 ( 0.001)\tLoss 1.1461e+00 (1.0418e+00)\n",
      "Epoch: [2][3350/5005]\tTime  0.560 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.5593e-01 (1.0417e+00)\n",
      "Epoch: [2][3400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0860e+00 (1.0416e+00)\n",
      "Epoch: [2][3450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0924e+00 (1.0416e+00)\n",
      "Epoch: [2][3500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1575e+00 (1.0415e+00)\n",
      "Epoch: [2][3550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.7158e-01 (1.0417e+00)\n",
      "Epoch: [2][3600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1316e+00 (1.0417e+00)\n",
      "Epoch: [2][3650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.6387e-01 (1.0419e+00)\n",
      "Epoch: [2][3700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4187e-01 (1.0423e+00)\n",
      "Epoch: [2][3750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.5815e-01 (1.0425e+00)\n",
      "Epoch: [2][3800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2364e+00 (1.0431e+00)\n",
      "Epoch: [2][3850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1746e+00 (1.0434e+00)\n",
      "Epoch: [2][3900/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9224e-01 (1.0440e+00)\n",
      "Epoch: [2][3950/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9203e-01 (1.0436e+00)\n",
      "Epoch: [2][4000/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4644e-01 (1.0440e+00)\n",
      "Epoch: [2][4050/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.2400e-01 (1.0439e+00)\n",
      "Epoch: [2][4100/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0102e+00 (1.0438e+00)\n",
      "Epoch: [2][4150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1502e+00 (1.0438e+00)\n",
      "Epoch: [2][4200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2401e+00 (1.0438e+00)\n",
      "Epoch: [2][4250/5005]\tTime  0.562 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4120e-01 (1.0439e+00)\n",
      "Epoch: [2][4300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0680e+00 (1.0440e+00)\n",
      "Epoch: [2][4350/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2462e+00 (1.0443e+00)\n",
      "Epoch: [2][4400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0627e+00 (1.0445e+00)\n",
      "Epoch: [2][4450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0009e+00 (1.0446e+00)\n",
      "Epoch: [2][4500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2563e+00 (1.0446e+00)\n",
      "Epoch: [2][4550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9738e-01 (1.0445e+00)\n",
      "Epoch: [2][4600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0126e+00 (1.0447e+00)\n",
      "Epoch: [2][4650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.0298e-01 (1.0448e+00)\n",
      "Epoch: [2][4700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0116e+00 (1.0451e+00)\n",
      "Epoch: [2][4750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.8413e-01 (1.0454e+00)\n",
      "Epoch: [2][4800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1183e+00 (1.0455e+00)\n",
      "Epoch: [2][4850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.8023e-01 (1.0455e+00)\n",
      "Epoch: [2][4900/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1394e+00 (1.0456e+00)\n",
      "Epoch: [2][4950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0043e+00 (1.0459e+00)\n",
      "Epoch: [2][5000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1129e+00 (1.0463e+00)\n",
      "Test: [  0/196]\tTime  3.970 ( 3.970)\tLoss 6.0279e-01 (6.0279e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.378 ( 0.448)\tLoss 6.9321e-01 (8.5034e-01)\tAcc@1  80.47 ( 77.41)\tAcc@5  95.31 ( 94.22)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [100/196]\tTime  0.377 ( 0.413)\tLoss 1.3742e+00 (9.8111e-01)\tAcc@1  66.80 ( 74.44)\tAcc@5  87.11 ( 92.66)\n",
      "Test: [150/196]\tTime  0.378 ( 0.401)\tLoss 1.1886e+00 (1.1128e+00)\tAcc@1  73.44 ( 71.85)\tAcc@5  87.89 ( 90.95)\n",
      "epoch 2 1.0462989768734747 71.06999969482422 0.0085 4698510 0.20040132214688156\n",
      "Epoch: [3][   0/5005]\tTime  3.221 ( 3.221)\tData  2.657 ( 2.657)\tLoss 1.2142e+00 (1.2142e+00)\n",
      "Epoch: [3][  50/5005]\tTime  0.558 ( 0.611)\tData  0.000 ( 0.052)\tLoss 9.5621e-01 (1.0098e+00)\n",
      "Epoch: [3][ 100/5005]\tTime  0.558 ( 0.585)\tData  0.000 ( 0.027)\tLoss 8.8497e-01 (1.0148e+00)\n",
      "Epoch: [3][ 150/5005]\tTime  0.558 ( 0.576)\tData  0.000 ( 0.018)\tLoss 1.0436e+00 (1.0206e+00)\n",
      "Epoch: [3][ 200/5005]\tTime  0.558 ( 0.572)\tData  0.000 ( 0.013)\tLoss 1.0050e+00 (1.0080e+00)\n",
      "Epoch: [3][ 250/5005]\tTime  0.558 ( 0.569)\tData  0.000 ( 0.011)\tLoss 1.0820e+00 (1.0065e+00)\n",
      "Epoch: [3][ 300/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.009)\tLoss 1.1982e+00 (1.0057e+00)\n",
      "Epoch: [3][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.008)\tLoss 9.5036e-01 (1.0017e+00)\n",
      "Epoch: [3][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 8.9041e-01 (1.0042e+00)\n",
      "Epoch: [3][ 450/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.006)\tLoss 9.5122e-01 (1.0038e+00)\n",
      "Epoch: [3][ 500/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.8865e-01 (1.0026e+00)\n",
      "Epoch: [3][ 550/5005]\tTime  0.562 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.1057e+00 (1.0018e+00)\n",
      "Epoch: [3][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 8.9641e-01 (1.0025e+00)\n",
      "Epoch: [3][ 650/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1820e+00 (1.0058e+00)\n",
      "Epoch: [3][ 700/5005]\tTime  0.559 ( 0.562)\tData  0.001 ( 0.004)\tLoss 9.2840e-01 (1.0041e+00)\n",
      "Epoch: [3][ 750/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.004)\tLoss 8.6656e-01 (1.0048e+00)\n",
      "Epoch: [3][ 800/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.1495e+00 (1.0052e+00)\n",
      "Epoch: [3][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.9337e-01 (1.0064e+00)\n",
      "Epoch: [3][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0217e+00 (1.0059e+00)\n",
      "Epoch: [3][ 950/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1487e+00 (1.0059e+00)\n",
      "Epoch: [3][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1633e+00 (1.0066e+00)\n",
      "Epoch: [3][1050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.3029e-01 (1.0065e+00)\n",
      "Epoch: [3][1100/5005]\tTime  0.557 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0099e+00 (1.0076e+00)\n",
      "Epoch: [3][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1088e+00 (1.0072e+00)\n",
      "Epoch: [3][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1157e+00 (1.0082e+00)\n",
      "Epoch: [3][1250/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8172e-01 (1.0088e+00)\n",
      "Epoch: [3][1300/5005]\tTime  0.557 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9686e-01 (1.0095e+00)\n",
      "Epoch: [3][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1268e+00 (1.0100e+00)\n",
      "Epoch: [3][1400/5005]\tTime  0.561 ( 0.561)\tData  0.001 ( 0.002)\tLoss 1.0864e+00 (1.0105e+00)\n",
      "Epoch: [3][1450/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4079e-01 (1.0095e+00)\n",
      "Epoch: [3][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0629e+00 (1.0102e+00)\n",
      "Epoch: [3][1550/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1189e+00 (1.0102e+00)\n",
      "Epoch: [3][1600/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0534e+00 (1.0111e+00)\n",
      "Epoch: [3][1650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.1314e-01 (1.0115e+00)\n",
      "Epoch: [3][1700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2088e+00 (1.0119e+00)\n",
      "Epoch: [3][1750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6795e-01 (1.0122e+00)\n",
      "Epoch: [3][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1162e+00 (1.0123e+00)\n",
      "Epoch: [3][1850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0423e+00 (1.0122e+00)\n",
      "Epoch: [3][1900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.1099e-01 (1.0128e+00)\n",
      "Epoch: [3][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6094e-01 (1.0129e+00)\n",
      "Epoch: [3][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0281e+00 (1.0130e+00)\n",
      "Epoch: [3][2050/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1313e+00 (1.0137e+00)\n",
      "Epoch: [3][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1329e+00 (1.0146e+00)\n",
      "Epoch: [3][2150/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7013e-01 (1.0145e+00)\n",
      "Epoch: [3][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9443e-01 (1.0151e+00)\n",
      "Epoch: [3][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0560e+00 (1.0149e+00)\n",
      "Epoch: [3][2300/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0640e+00 (1.0152e+00)\n",
      "Epoch: [3][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0393e+00 (1.0158e+00)\n",
      "Epoch: [3][2400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0484e+00 (1.0161e+00)\n",
      "Epoch: [3][2450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0524e+00 (1.0163e+00)\n",
      "Epoch: [3][2500/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1280e+00 (1.0170e+00)\n",
      "Epoch: [3][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8097e-01 (1.0172e+00)\n",
      "Epoch: [3][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2815e+00 (1.0173e+00)\n",
      "Epoch: [3][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3655e-01 (1.0171e+00)\n",
      "Epoch: [3][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0224e+00 (1.0179e+00)\n",
      "Epoch: [3][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5060e-01 (1.0183e+00)\n",
      "Epoch: [3][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1913e+00 (1.0190e+00)\n",
      "Epoch: [3][2850/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1150e+00 (1.0194e+00)\n",
      "Epoch: [3][2900/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0304e-01 (1.0192e+00)\n",
      "Epoch: [3][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0868e+00 (1.0190e+00)\n",
      "Epoch: [3][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2862e+00 (1.0189e+00)\n",
      "Epoch: [3][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1130e+00 (1.0192e+00)\n",
      "Epoch: [3][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2389e+00 (1.0197e+00)\n",
      "Epoch: [3][3150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1064e+00 (1.0200e+00)\n",
      "Epoch: [3][3200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0716e-01 (1.0202e+00)\n",
      "Epoch: [3][3250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5306e-01 (1.0206e+00)\n",
      "Epoch: [3][3300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0971e+00 (1.0204e+00)\n",
      "Epoch: [3][3350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1621e+00 (1.0205e+00)\n",
      "Epoch: [3][3400/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 9.8381e-01 (1.0209e+00)\n",
      "Epoch: [3][3450/5005]\tTime  0.560 ( 0.560)\tData  0.001 ( 0.001)\tLoss 1.0876e+00 (1.0212e+00)\n",
      "Epoch: [3][3500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1299e+00 (1.0217e+00)\n",
      "Epoch: [3][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2828e-01 (1.0218e+00)\n",
      "Epoch: [3][3600/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1409e+00 (1.0221e+00)\n",
      "Epoch: [3][3650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6847e-01 (1.0222e+00)\n",
      "Epoch: [3][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0708e+00 (1.0223e+00)\n",
      "Epoch: [3][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0943e+00 (1.0226e+00)\n",
      "Epoch: [3][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9159e-01 (1.0231e+00)\n",
      "Epoch: [3][3850/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1456e+00 (1.0230e+00)\n",
      "Epoch: [3][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0744e+00 (1.0230e+00)\n",
      "Epoch: [3][3950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7482e-01 (1.0236e+00)\n",
      "Epoch: [3][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0243e+00 (1.0239e+00)\n",
      "Epoch: [3][4050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0405e+00 (1.0237e+00)\n",
      "Epoch: [3][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1518e+00 (1.0236e+00)\n",
      "Epoch: [3][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6284e-01 (1.0239e+00)\n",
      "Epoch: [3][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9749e-01 (1.0241e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [3][4250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2525e-01 (1.0240e+00)\n",
      "Epoch: [3][4300/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0136e+00 (1.0242e+00)\n",
      "Epoch: [3][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0841e+00 (1.0242e+00)\n",
      "Epoch: [3][4400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2150e+00 (1.0241e+00)\n",
      "Epoch: [3][4450/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1713e+00 (1.0242e+00)\n",
      "Epoch: [3][4500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3674e-01 (1.0241e+00)\n",
      "Epoch: [3][4550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2302e+00 (1.0240e+00)\n",
      "Epoch: [3][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1086e+00 (1.0240e+00)\n",
      "Epoch: [3][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0767e+00 (1.0241e+00)\n",
      "Epoch: [3][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0596e+00 (1.0243e+00)\n",
      "Epoch: [3][4750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0693e+00 (1.0242e+00)\n",
      "Epoch: [3][4800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1744e+00 (1.0243e+00)\n",
      "Epoch: [3][4850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2835e+00 (1.0246e+00)\n",
      "Epoch: [3][4900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0386e+00 (1.0247e+00)\n",
      "Epoch: [3][4950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1867e+00 (1.0249e+00)\n",
      "Epoch: [3][5000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0217e+00 (1.0250e+00)\n",
      "Test: [  0/196]\tTime  3.391 ( 3.391)\tLoss 6.3943e-01 (6.3943e-01)\tAcc@1  80.86 ( 80.86)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.436)\tLoss 5.1129e-01 (8.5023e-01)\tAcc@1  87.50 ( 77.54)\tAcc@5  96.88 ( 94.11)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 1.5865e+00 (9.7698e-01)\tAcc@1  57.03 ( 74.62)\tAcc@5  83.20 ( 92.62)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.2660e+00 (1.1014e+00)\tAcc@1  71.48 ( 72.17)\tAcc@5  86.72 ( 91.05)\n",
      "epoch 3 1.0250365165933228 71.15399932861328 0.008000000000000002 4698510 0.20040132214688156\n",
      "Epoch: [4][   0/5005]\tTime  3.060 ( 3.060)\tData  2.500 ( 2.500)\tLoss 7.9279e-01 (7.9279e-01)\n",
      "Epoch: [4][  50/5005]\tTime  0.559 ( 0.609)\tData  0.000 ( 0.049)\tLoss 9.1954e-01 (9.7512e-01)\n",
      "Epoch: [4][ 100/5005]\tTime  0.559 ( 0.584)\tData  0.000 ( 0.025)\tLoss 8.7946e-01 (9.7662e-01)\n",
      "Epoch: [4][ 150/5005]\tTime  0.559 ( 0.576)\tData  0.000 ( 0.017)\tLoss 9.3388e-01 (9.7787e-01)\n",
      "Epoch: [4][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.013)\tLoss 1.0407e+00 (9.7441e-01)\n",
      "Epoch: [4][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 9.8427e-01 (9.7368e-01)\n",
      "Epoch: [4][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.009)\tLoss 1.0693e+00 (9.7839e-01)\n",
      "Epoch: [4][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0130e+00 (9.8015e-01)\n",
      "Epoch: [4][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0488e+00 (9.8198e-01)\n",
      "Epoch: [4][ 450/5005]\tTime  0.560 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.4453e-01 (9.8368e-01)\n",
      "Epoch: [4][ 500/5005]\tTime  0.562 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0346e+00 (9.8389e-01)\n",
      "Epoch: [4][ 550/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 7.6684e-01 (9.8413e-01)\n",
      "Epoch: [4][ 600/5005]\tTime  0.562 ( 0.564)\tData  0.000 ( 0.004)\tLoss 1.1574e+00 (9.8412e-01)\n",
      "Epoch: [4][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.5133e-01 (9.8473e-01)\n",
      "Epoch: [4][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.4910e-01 (9.8734e-01)\n",
      "Epoch: [4][ 750/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0985e+00 (9.8641e-01)\n",
      "Epoch: [4][ 800/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.8973e-01 (9.8631e-01)\n",
      "Epoch: [4][ 850/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.7948e-01 (9.8615e-01)\n",
      "Epoch: [4][ 900/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1005e+00 (9.8718e-01)\n",
      "Epoch: [4][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0022e+00 (9.8646e-01)\n",
      "Epoch: [4][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1490e+00 (9.8754e-01)\n",
      "Epoch: [4][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0081e+00 (9.8775e-01)\n",
      "Epoch: [4][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.8018e-01 (9.8802e-01)\n",
      "Epoch: [4][1150/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0705e+00 (9.8816e-01)\n",
      "Epoch: [4][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4164e-01 (9.8870e-01)\n",
      "Epoch: [4][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8153e-01 (9.9028e-01)\n",
      "Epoch: [4][1300/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5393e-01 (9.9047e-01)\n",
      "Epoch: [4][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1385e+00 (9.9082e-01)\n",
      "Epoch: [4][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2391e-01 (9.9138e-01)\n",
      "Epoch: [4][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0111e+00 (9.9172e-01)\n",
      "Epoch: [4][1500/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0056e+00 (9.9263e-01)\n",
      "Epoch: [4][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3159e-01 (9.9231e-01)\n",
      "Epoch: [4][1600/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6983e-01 (9.9271e-01)\n",
      "Epoch: [4][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.1564e-01 (9.9253e-01)\n",
      "Epoch: [4][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0585e+00 (9.9297e-01)\n",
      "Epoch: [4][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9236e-01 (9.9275e-01)\n",
      "Epoch: [4][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2576e-01 (9.9251e-01)\n",
      "Epoch: [4][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0222e+00 (9.9220e-01)\n",
      "Epoch: [4][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6327e-01 (9.9257e-01)\n",
      "Epoch: [4][1950/5005]\tTime  0.562 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8556e-01 (9.9230e-01)\n",
      "Epoch: [4][2000/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.001)\tLoss 1.0053e+00 (9.9226e-01)\n",
      "Epoch: [4][2050/5005]\tTime  0.562 ( 0.561)\tData  0.001 ( 0.001)\tLoss 1.0080e+00 (9.9265e-01)\n",
      "Epoch: [4][2100/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.001)\tLoss 8.9275e-01 (9.9299e-01)\n",
      "Epoch: [4][2150/5005]\tTime  0.563 ( 0.561)\tData  0.000 ( 0.001)\tLoss 1.0026e+00 (9.9293e-01)\n",
      "Epoch: [4][2200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 8.2865e-01 (9.9285e-01)\n",
      "Epoch: [4][2250/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.001)\tLoss 9.7781e-01 (9.9302e-01)\n",
      "Epoch: [4][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1092e+00 (9.9361e-01)\n",
      "Epoch: [4][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3356e+00 (9.9390e-01)\n",
      "Epoch: [4][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0846e+00 (9.9397e-01)\n",
      "Epoch: [4][2450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0757e+00 (9.9404e-01)\n",
      "Epoch: [4][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1575e+00 (9.9389e-01)\n",
      "Epoch: [4][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6506e-01 (9.9368e-01)\n",
      "Epoch: [4][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9303e-01 (9.9348e-01)\n",
      "Epoch: [4][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0134e+00 (9.9416e-01)\n",
      "Epoch: [4][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0916e+00 (9.9414e-01)\n",
      "Epoch: [4][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2259e+00 (9.9457e-01)\n",
      "Epoch: [4][2800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8781e-01 (9.9474e-01)\n",
      "Epoch: [4][2850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3650e-01 (9.9506e-01)\n",
      "Epoch: [4][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0429e+00 (9.9546e-01)\n",
      "Epoch: [4][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1006e+00 (9.9545e-01)\n",
      "Epoch: [4][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3678e-01 (9.9567e-01)\n",
      "Epoch: [4][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1346e+00 (9.9621e-01)\n",
      "Epoch: [4][3100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0064e+00 (9.9710e-01)\n",
      "Epoch: [4][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0434e+00 (9.9772e-01)\n",
      "Epoch: [4][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0076e+00 (9.9770e-01)\n",
      "Epoch: [4][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2953e-01 (9.9767e-01)\n",
      "Epoch: [4][3300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0008e+00 (9.9783e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [4][3350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9542e-01 (9.9807e-01)\n",
      "Epoch: [4][3400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4848e-01 (9.9834e-01)\n",
      "Epoch: [4][3450/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1386e+00 (9.9825e-01)\n",
      "Epoch: [4][3500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0120e+00 (9.9817e-01)\n",
      "Epoch: [4][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7283e-01 (9.9812e-01)\n",
      "Epoch: [4][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1716e-01 (9.9833e-01)\n",
      "Epoch: [4][3650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1992e+00 (9.9865e-01)\n",
      "Epoch: [4][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7073e-01 (9.9879e-01)\n",
      "Epoch: [4][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6913e-01 (9.9877e-01)\n",
      "Epoch: [4][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0821e+00 (9.9912e-01)\n",
      "Epoch: [4][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7043e-01 (9.9919e-01)\n",
      "Epoch: [4][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0264e+00 (9.9934e-01)\n",
      "Epoch: [4][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4924e-01 (9.9938e-01)\n",
      "Epoch: [4][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8428e-01 (9.9972e-01)\n",
      "Epoch: [4][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0818e+00 (1.0001e+00)\n",
      "Epoch: [4][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8150e-01 (1.0002e+00)\n",
      "Epoch: [4][4150/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 1.1305e+00 (1.0004e+00)\n",
      "Epoch: [4][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0738e+00 (1.0004e+00)\n",
      "Epoch: [4][4250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0705e+00 (1.0006e+00)\n",
      "Epoch: [4][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0143e+00 (1.0009e+00)\n",
      "Epoch: [4][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8359e-01 (1.0010e+00)\n",
      "Epoch: [4][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3529e-01 (1.0009e+00)\n",
      "Epoch: [4][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1203e+00 (1.0012e+00)\n",
      "Epoch: [4][4500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0881e+00 (1.0013e+00)\n",
      "Epoch: [4][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0247e+00 (1.0014e+00)\n",
      "Epoch: [4][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4210e-01 (1.0016e+00)\n",
      "Epoch: [4][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0948e+00 (1.0019e+00)\n",
      "Epoch: [4][4700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1987e+00 (1.0022e+00)\n",
      "Epoch: [4][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0544e+00 (1.0024e+00)\n",
      "Epoch: [4][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1507e+00 (1.0027e+00)\n",
      "Epoch: [4][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0856e+00 (1.0029e+00)\n",
      "Epoch: [4][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3030e+00 (1.0032e+00)\n",
      "Epoch: [4][4950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7156e-01 (1.0032e+00)\n",
      "Epoch: [4][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0191e+00 (1.0035e+00)\n",
      "Test: [  0/196]\tTime  3.364 ( 3.364)\tLoss 6.9766e-01 (6.9766e-01)\tAcc@1  79.30 ( 79.30)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.436)\tLoss 5.7402e-01 (8.0955e-01)\tAcc@1  84.77 ( 77.95)\tAcc@5  96.88 ( 94.65)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.5520e+00 (9.5172e-01)\tAcc@1  59.77 ( 74.98)\tAcc@5  85.55 ( 93.01)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.1772e+00 (1.0900e+00)\tAcc@1  73.05 ( 72.41)\tAcc@5  88.28 ( 91.14)\n",
      "epoch 4 1.003476388191995 71.47799682617188 0.0075000000000000015 4698510 0.20040132214688156\n",
      "Epoch: [5][   0/5005]\tTime  3.061 ( 3.061)\tData  2.501 ( 2.501)\tLoss 9.0186e-01 (9.0186e-01)\n",
      "Epoch: [5][  50/5005]\tTime  0.559 ( 0.608)\tData  0.000 ( 0.049)\tLoss 1.0035e+00 (9.6595e-01)\n",
      "Epoch: [5][ 100/5005]\tTime  0.559 ( 0.584)\tData  0.000 ( 0.025)\tLoss 8.2447e-01 (9.7006e-01)\n",
      "Epoch: [5][ 150/5005]\tTime  0.559 ( 0.576)\tData  0.000 ( 0.017)\tLoss 8.0145e-01 (9.6784e-01)\n",
      "Epoch: [5][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.013)\tLoss 9.4430e-01 (9.6554e-01)\n",
      "Epoch: [5][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.0220e+00 (9.6744e-01)\n",
      "Epoch: [5][ 300/5005]\tTime  0.561 ( 0.567)\tData  0.000 ( 0.009)\tLoss 1.0641e+00 (9.6761e-01)\n",
      "Epoch: [5][ 350/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.007)\tLoss 9.0414e-01 (9.6732e-01)\n",
      "Epoch: [5][ 400/5005]\tTime  0.562 ( 0.566)\tData  0.000 ( 0.007)\tLoss 9.7957e-01 (9.6844e-01)\n",
      "Epoch: [5][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0268e+00 (9.6852e-01)\n",
      "Epoch: [5][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.005)\tLoss 1.1313e+00 (9.6920e-01)\n",
      "Epoch: [5][ 550/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.1025e+00 (9.7049e-01)\n",
      "Epoch: [5][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.004)\tLoss 1.0181e+00 (9.7255e-01)\n",
      "Epoch: [5][ 650/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.6609e-01 (9.7480e-01)\n",
      "Epoch: [5][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0439e+00 (9.7331e-01)\n",
      "Epoch: [5][ 750/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.3937e-01 (9.7403e-01)\n",
      "Epoch: [5][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.8756e-01 (9.7326e-01)\n",
      "Epoch: [5][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0168e+00 (9.7337e-01)\n",
      "Epoch: [5][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0816e+00 (9.7477e-01)\n",
      "Epoch: [5][ 950/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.5666e-01 (9.7471e-01)\n",
      "Epoch: [5][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.2618e-01 (9.7380e-01)\n",
      "Epoch: [5][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1362e+00 (9.7470e-01)\n",
      "Epoch: [5][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.6185e-01 (9.7363e-01)\n",
      "Epoch: [5][1150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4382e-01 (9.7398e-01)\n",
      "Epoch: [5][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.7756e-01 (9.7371e-01)\n",
      "Epoch: [5][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0355e+00 (9.7341e-01)\n",
      "Epoch: [5][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0274e+00 (9.7359e-01)\n",
      "Epoch: [5][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0323e+00 (9.7389e-01)\n",
      "Epoch: [5][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5936e-01 (9.7304e-01)\n",
      "Epoch: [5][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0863e+00 (9.7403e-01)\n",
      "Epoch: [5][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2008e+00 (9.7466e-01)\n",
      "Epoch: [5][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6473e-01 (9.7466e-01)\n",
      "Epoch: [5][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.7175e-01 (9.7484e-01)\n",
      "Epoch: [5][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6363e-01 (9.7485e-01)\n",
      "Epoch: [5][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.3468e-01 (9.7478e-01)\n",
      "Epoch: [5][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0993e+00 (9.7520e-01)\n",
      "Epoch: [5][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.1234e-01 (9.7502e-01)\n",
      "Epoch: [5][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0108e+00 (9.7472e-01)\n",
      "Epoch: [5][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0779e+00 (9.7493e-01)\n",
      "Epoch: [5][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.7693e-01 (9.7506e-01)\n",
      "Epoch: [5][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1597e-01 (9.7510e-01)\n",
      "Epoch: [5][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9709e-01 (9.7508e-01)\n",
      "Epoch: [5][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4313e-01 (9.7526e-01)\n",
      "Epoch: [5][2150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6144e-01 (9.7553e-01)\n",
      "Epoch: [5][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0238e+00 (9.7597e-01)\n",
      "Epoch: [5][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0589e+00 (9.7645e-01)\n",
      "Epoch: [5][2300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9484e-01 (9.7674e-01)\n",
      "Epoch: [5][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0574e+00 (9.7705e-01)\n",
      "Epoch: [5][2400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9919e-01 (9.7799e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [5][2450/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 8.9964e-01 (9.7833e-01)\n",
      "Epoch: [5][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1006e+00 (9.7864e-01)\n",
      "Epoch: [5][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3472e-01 (9.7883e-01)\n",
      "Epoch: [5][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7198e-01 (9.7933e-01)\n",
      "Epoch: [5][2650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7519e-01 (9.7966e-01)\n",
      "Epoch: [5][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0780e+00 (9.8021e-01)\n",
      "Epoch: [5][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2796e-01 (9.8067e-01)\n",
      "Epoch: [5][2800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1164e+00 (9.8083e-01)\n",
      "Epoch: [5][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8341e-01 (9.8102e-01)\n",
      "Epoch: [5][2900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1017e+00 (9.8109e-01)\n",
      "Epoch: [5][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0018e-01 (9.8134e-01)\n",
      "Epoch: [5][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0624e-01 (9.8156e-01)\n",
      "Epoch: [5][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1865e+00 (9.8169e-01)\n",
      "Epoch: [5][3100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2683e-01 (9.8142e-01)\n",
      "Epoch: [5][3150/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0039e+00 (9.8149e-01)\n",
      "Epoch: [5][3200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0509e+00 (9.8179e-01)\n",
      "Epoch: [5][3250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0442e+00 (9.8217e-01)\n",
      "Epoch: [5][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6863e-01 (9.8237e-01)\n",
      "Epoch: [5][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7821e-01 (9.8225e-01)\n",
      "Epoch: [5][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0930e+00 (9.8260e-01)\n",
      "Epoch: [5][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6013e-01 (9.8244e-01)\n",
      "Epoch: [5][3500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3199e-01 (9.8271e-01)\n",
      "Epoch: [5][3550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0541e+00 (9.8304e-01)\n",
      "Epoch: [5][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0846e+00 (9.8320e-01)\n",
      "Epoch: [5][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8996e-01 (9.8351e-01)\n",
      "Epoch: [5][3700/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0825e+00 (9.8365e-01)\n",
      "Epoch: [5][3750/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6618e-01 (9.8350e-01)\n",
      "Epoch: [5][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0808e-01 (9.8364e-01)\n",
      "Epoch: [5][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2859e-01 (9.8360e-01)\n",
      "Epoch: [5][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5656e-01 (9.8372e-01)\n",
      "Epoch: [5][3950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0127e+00 (9.8386e-01)\n",
      "Epoch: [5][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4107e-01 (9.8434e-01)\n",
      "Epoch: [5][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0102e+00 (9.8451e-01)\n",
      "Epoch: [5][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0218e+00 (9.8483e-01)\n",
      "Epoch: [5][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1292e-01 (9.8506e-01)\n",
      "Epoch: [5][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0339e+00 (9.8518e-01)\n",
      "Epoch: [5][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1270e+00 (9.8537e-01)\n",
      "Epoch: [5][4300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9365e-01 (9.8537e-01)\n",
      "Epoch: [5][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0696e+00 (9.8573e-01)\n",
      "Epoch: [5][4400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7634e-01 (9.8567e-01)\n",
      "Epoch: [5][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1476e-01 (9.8600e-01)\n",
      "Epoch: [5][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1973e+00 (9.8628e-01)\n",
      "Epoch: [5][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5626e-01 (9.8664e-01)\n",
      "Epoch: [5][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9264e-01 (9.8672e-01)\n",
      "Epoch: [5][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0352e+00 (9.8677e-01)\n",
      "Epoch: [5][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8082e-01 (9.8666e-01)\n",
      "Epoch: [5][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0522e+00 (9.8701e-01)\n",
      "Epoch: [5][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1124e+00 (9.8705e-01)\n",
      "Epoch: [5][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0834e+00 (9.8744e-01)\n",
      "Epoch: [5][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0515e+00 (9.8728e-01)\n",
      "Epoch: [5][4950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4145e-01 (9.8751e-01)\n",
      "Epoch: [5][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9846e-01 (9.8731e-01)\n",
      "Test: [  0/196]\tTime  3.277 ( 3.277)\tLoss 5.4650e-01 (5.4650e-01)\tAcc@1  85.55 ( 85.55)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.378 ( 0.434)\tLoss 5.2552e-01 (8.1381e-01)\tAcc@1  87.11 ( 78.36)\tAcc@5  96.88 ( 94.43)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.3771e+00 (9.4360e-01)\tAcc@1  62.89 ( 75.43)\tAcc@5  88.67 ( 93.13)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.1138e+00 (1.0714e+00)\tAcc@1  74.61 ( 72.91)\tAcc@5  89.84 ( 91.46)\n",
      "epoch 5 0.9873272186192203 71.97200012207031 0.007000000000000001 4698510 0.20040132214688156\n",
      "Epoch: [6][   0/5005]\tTime  3.375 ( 3.375)\tData  2.811 ( 2.811)\tLoss 9.1627e-01 (9.1627e-01)\n",
      "Epoch: [6][  50/5005]\tTime  0.559 ( 0.614)\tData  0.000 ( 0.055)\tLoss 1.0413e+00 (9.8653e-01)\n",
      "Epoch: [6][ 100/5005]\tTime  0.561 ( 0.587)\tData  0.000 ( 0.028)\tLoss 8.4657e-01 (9.6926e-01)\n",
      "Epoch: [6][ 150/5005]\tTime  0.560 ( 0.578)\tData  0.000 ( 0.019)\tLoss 8.3990e-01 (9.6043e-01)\n",
      "Epoch: [6][ 200/5005]\tTime  0.558 ( 0.573)\tData  0.000 ( 0.014)\tLoss 8.3116e-01 (9.6066e-01)\n",
      "Epoch: [6][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.0690e+00 (9.5847e-01)\n",
      "Epoch: [6][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.010)\tLoss 9.2476e-01 (9.5829e-01)\n",
      "Epoch: [6][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.0010e+00 (9.5713e-01)\n",
      "Epoch: [6][ 400/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0066e+00 (9.5759e-01)\n",
      "Epoch: [6][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.3068e-01 (9.5493e-01)\n",
      "Epoch: [6][ 500/5005]\tTime  0.561 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1891e+00 (9.5437e-01)\n",
      "Epoch: [6][ 550/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0663e+00 (9.5640e-01)\n",
      "Epoch: [6][ 600/5005]\tTime  0.561 ( 0.564)\tData  0.000 ( 0.005)\tLoss 7.7268e-01 (9.5591e-01)\n",
      "Epoch: [6][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.0398e+00 (9.5676e-01)\n",
      "Epoch: [6][ 700/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.8940e-01 (9.5575e-01)\n",
      "Epoch: [6][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1393e+00 (9.5667e-01)\n",
      "Epoch: [6][ 800/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.4996e-01 (9.5569e-01)\n",
      "Epoch: [6][ 850/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.0346e+00 (9.5560e-01)\n",
      "Epoch: [6][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.5562e-01 (9.5508e-01)\n",
      "Epoch: [6][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.9027e-01 (9.5581e-01)\n",
      "Epoch: [6][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0127e+00 (9.5693e-01)\n",
      "Epoch: [6][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.8113e-01 (9.5645e-01)\n",
      "Epoch: [6][1100/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0167e+00 (9.5705e-01)\n",
      "Epoch: [6][1150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.2187e-01 (9.5627e-01)\n",
      "Epoch: [6][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 7.5841e-01 (9.5625e-01)\n",
      "Epoch: [6][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8648e-01 (9.5653e-01)\n",
      "Epoch: [6][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4548e-01 (9.5592e-01)\n",
      "Epoch: [6][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4098e-01 (9.5631e-01)\n",
      "Epoch: [6][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.3719e-01 (9.5636e-01)\n",
      "Epoch: [6][1450/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1653e+00 (9.5726e-01)\n",
      "Epoch: [6][1500/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4587e-01 (9.5735e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [6][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1130e+00 (9.5739e-01)\n",
      "Epoch: [6][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6129e-01 (9.5621e-01)\n",
      "Epoch: [6][1650/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8287e-01 (9.5589e-01)\n",
      "Epoch: [6][1700/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0073e-01 (9.5544e-01)\n",
      "Epoch: [6][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.2898e-01 (9.5547e-01)\n",
      "Epoch: [6][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.0552e-01 (9.5542e-01)\n",
      "Epoch: [6][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.4614e-01 (9.5645e-01)\n",
      "Epoch: [6][1900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0975e+00 (9.5680e-01)\n",
      "Epoch: [6][1950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0230e+00 (9.5684e-01)\n",
      "Epoch: [6][2000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0198e+00 (9.5662e-01)\n",
      "Epoch: [6][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0243e+00 (9.5635e-01)\n",
      "Epoch: [6][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.7059e-01 (9.5677e-01)\n",
      "Epoch: [6][2150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1289e+00 (9.5695e-01)\n",
      "Epoch: [6][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0138e+00 (9.5727e-01)\n",
      "Epoch: [6][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1967e-01 (9.5779e-01)\n",
      "Epoch: [6][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7993e-01 (9.5806e-01)\n",
      "Epoch: [6][2350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0232e+00 (9.5838e-01)\n",
      "Epoch: [6][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7854e-01 (9.5883e-01)\n",
      "Epoch: [6][2450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3481e-01 (9.5896e-01)\n",
      "Epoch: [6][2500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8220e-01 (9.5954e-01)\n",
      "Epoch: [6][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5149e-01 (9.5988e-01)\n",
      "Epoch: [6][2600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4856e-01 (9.6017e-01)\n",
      "Epoch: [6][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9350e-01 (9.6076e-01)\n",
      "Epoch: [6][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0612e+00 (9.6081e-01)\n",
      "Epoch: [6][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6497e-01 (9.6052e-01)\n",
      "Epoch: [6][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5443e-01 (9.6023e-01)\n",
      "Epoch: [6][2850/5005]\tTime  0.559 ( 0.560)\tData  0.001 ( 0.001)\tLoss 1.0557e+00 (9.6077e-01)\n",
      "Epoch: [6][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8146e-01 (9.6121e-01)\n",
      "Epoch: [6][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3416e-01 (9.6139e-01)\n",
      "Epoch: [6][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8075e-01 (9.6165e-01)\n",
      "Epoch: [6][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1605e+00 (9.6204e-01)\n",
      "Epoch: [6][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0332e+00 (9.6201e-01)\n",
      "Epoch: [6][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8107e-01 (9.6209e-01)\n",
      "Epoch: [6][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3926e-01 (9.6258e-01)\n",
      "Epoch: [6][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0414e+00 (9.6245e-01)\n",
      "Epoch: [6][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7118e-01 (9.6292e-01)\n",
      "Epoch: [6][3350/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0596e+00 (9.6333e-01)\n",
      "Epoch: [6][3400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0889e+00 (9.6340e-01)\n",
      "Epoch: [6][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5807e-01 (9.6321e-01)\n",
      "Epoch: [6][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6899e-01 (9.6305e-01)\n",
      "Epoch: [6][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1325e+00 (9.6326e-01)\n",
      "Epoch: [6][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3308e-01 (9.6331e-01)\n",
      "Epoch: [6][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0208e+00 (9.6386e-01)\n",
      "Epoch: [6][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6234e-01 (9.6412e-01)\n",
      "Epoch: [6][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5521e-01 (9.6437e-01)\n",
      "Epoch: [6][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1297e+00 (9.6465e-01)\n",
      "Epoch: [6][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8102e-01 (9.6480e-01)\n",
      "Epoch: [6][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0404e+00 (9.6492e-01)\n",
      "Epoch: [6][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1991e-01 (9.6465e-01)\n",
      "Epoch: [6][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1113e+00 (9.6486e-01)\n",
      "Epoch: [6][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0111e+00 (9.6499e-01)\n",
      "Epoch: [6][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0715e+00 (9.6536e-01)\n",
      "Epoch: [6][4150/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1717e+00 (9.6554e-01)\n",
      "Epoch: [6][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0195e+00 (9.6590e-01)\n",
      "Epoch: [6][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0612e+00 (9.6599e-01)\n",
      "Epoch: [6][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5197e-01 (9.6609e-01)\n",
      "Epoch: [6][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0093e+00 (9.6638e-01)\n",
      "Epoch: [6][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8699e-01 (9.6664e-01)\n",
      "Epoch: [6][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0538e+00 (9.6681e-01)\n",
      "Epoch: [6][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5941e-01 (9.6700e-01)\n",
      "Epoch: [6][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9438e-01 (9.6727e-01)\n",
      "Epoch: [6][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0722e+00 (9.6735e-01)\n",
      "Epoch: [6][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1904e-01 (9.6754e-01)\n",
      "Epoch: [6][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0733e+00 (9.6745e-01)\n",
      "Epoch: [6][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0172e+00 (9.6753e-01)\n",
      "Epoch: [6][4800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1530e-01 (9.6795e-01)\n",
      "Epoch: [6][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8710e-01 (9.6829e-01)\n",
      "Epoch: [6][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7904e-01 (9.6828e-01)\n",
      "Epoch: [6][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4731e-01 (9.6829e-01)\n",
      "Epoch: [6][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3711e-01 (9.6834e-01)\n",
      "Test: [  0/196]\tTime  3.235 ( 3.235)\tLoss 6.0208e-01 (6.0208e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.433)\tLoss 5.2541e-01 (8.1947e-01)\tAcc@1  86.72 ( 77.96)\tAcc@5  96.88 ( 94.66)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 1.5233e+00 (9.5941e-01)\tAcc@1  56.64 ( 74.98)\tAcc@5  87.50 ( 93.12)\n",
      "Test: [150/196]\tTime  0.378 ( 0.396)\tLoss 1.0625e+00 (1.0783e+00)\tAcc@1  76.56 ( 72.68)\tAcc@5  89.84 ( 91.44)\n",
      "epoch 6 0.9683912082928609 71.83599853515625 0.006500000000000002 4698510 0.20040132214688156\n",
      "Epoch: [7][   0/5005]\tTime  3.282 ( 3.282)\tData  2.717 ( 2.717)\tLoss 9.1550e-01 (9.1550e-01)\n",
      "Epoch: [7][  50/5005]\tTime  0.559 ( 0.612)\tData  0.000 ( 0.053)\tLoss 9.2035e-01 (9.4285e-01)\n",
      "Epoch: [7][ 100/5005]\tTime  0.559 ( 0.586)\tData  0.000 ( 0.027)\tLoss 9.6225e-01 (9.4379e-01)\n",
      "Epoch: [7][ 150/5005]\tTime  0.558 ( 0.577)\tData  0.000 ( 0.018)\tLoss 9.7545e-01 (9.4101e-01)\n",
      "Epoch: [7][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.014)\tLoss 9.1601e-01 (9.3855e-01)\n",
      "Epoch: [7][ 250/5005]\tTime  0.558 ( 0.570)\tData  0.000 ( 0.011)\tLoss 9.8116e-01 (9.3796e-01)\n",
      "Epoch: [7][ 300/5005]\tTime  0.561 ( 0.568)\tData  0.000 ( 0.009)\tLoss 9.2239e-01 (9.3542e-01)\n",
      "Epoch: [7][ 350/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.008)\tLoss 9.0606e-01 (9.3531e-01)\n",
      "Epoch: [7][ 400/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.007)\tLoss 8.7604e-01 (9.3386e-01)\n",
      "Epoch: [7][ 450/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.006)\tLoss 9.7984e-01 (9.3156e-01)\n",
      "Epoch: [7][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 9.4077e-01 (9.3485e-01)\n",
      "Epoch: [7][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 8.9932e-01 (9.3485e-01)\n",
      "Epoch: [7][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.1635e+00 (9.3295e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [7][ 650/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.8760e-01 (9.3351e-01)\n",
      "Epoch: [7][ 700/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0247e+00 (9.3512e-01)\n",
      "Epoch: [7][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 8.5250e-01 (9.3451e-01)\n",
      "Epoch: [7][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 8.3944e-01 (9.3545e-01)\n",
      "Epoch: [7][ 850/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.4175e-01 (9.3368e-01)\n",
      "Epoch: [7][ 900/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.7891e-01 (9.3303e-01)\n",
      "Epoch: [7][ 950/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.3470e-01 (9.3509e-01)\n",
      "Epoch: [7][1000/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.3214e-01 (9.3703e-01)\n",
      "Epoch: [7][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.2922e-01 (9.3723e-01)\n",
      "Epoch: [7][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.7858e-01 (9.3821e-01)\n",
      "Epoch: [7][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0284e+00 (9.3896e-01)\n",
      "Epoch: [7][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6725e-01 (9.3863e-01)\n",
      "Epoch: [7][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.4225e-01 (9.3805e-01)\n",
      "Epoch: [7][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5477e-01 (9.3866e-01)\n",
      "Epoch: [7][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.7804e-01 (9.3810e-01)\n",
      "Epoch: [7][1400/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6275e-01 (9.3862e-01)\n",
      "Epoch: [7][1450/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.0817e-01 (9.3890e-01)\n",
      "Epoch: [7][1500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.5963e-01 (9.3851e-01)\n",
      "Epoch: [7][1550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6456e-01 (9.3905e-01)\n",
      "Epoch: [7][1600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1363e+00 (9.3929e-01)\n",
      "Epoch: [7][1650/5005]\tTime  0.564 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.3711e-01 (9.3971e-01)\n",
      "Epoch: [7][1700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.4424e-01 (9.3980e-01)\n",
      "Epoch: [7][1750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1217e+00 (9.4009e-01)\n",
      "Epoch: [7][1800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6491e-01 (9.4010e-01)\n",
      "Epoch: [7][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.4809e-01 (9.4019e-01)\n",
      "Epoch: [7][1900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6971e-01 (9.4023e-01)\n",
      "Epoch: [7][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1120e+00 (9.4059e-01)\n",
      "Epoch: [7][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6815e-01 (9.4116e-01)\n",
      "Epoch: [7][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.5808e-01 (9.4201e-01)\n",
      "Epoch: [7][2100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.6130e-01 (9.4183e-01)\n",
      "Epoch: [7][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6579e-01 (9.4203e-01)\n",
      "Epoch: [7][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3115e-01 (9.4160e-01)\n",
      "Epoch: [7][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0005e+00 (9.4166e-01)\n",
      "Epoch: [7][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9846e-01 (9.4140e-01)\n",
      "Epoch: [7][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4796e-01 (9.4134e-01)\n",
      "Epoch: [7][2400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1157e+00 (9.4176e-01)\n",
      "Epoch: [7][2450/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1415e+00 (9.4199e-01)\n",
      "Epoch: [7][2500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7451e-01 (9.4183e-01)\n",
      "Epoch: [7][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4283e-01 (9.4213e-01)\n",
      "Epoch: [7][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1644e-01 (9.4260e-01)\n",
      "Epoch: [7][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0243e+00 (9.4300e-01)\n",
      "Epoch: [7][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0052e+00 (9.4315e-01)\n",
      "Epoch: [7][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8235e-01 (9.4336e-01)\n",
      "Epoch: [7][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8157e-01 (9.4359e-01)\n",
      "Epoch: [7][2850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1441e-01 (9.4372e-01)\n",
      "Epoch: [7][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0833e-01 (9.4419e-01)\n",
      "Epoch: [7][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0019e+00 (9.4432e-01)\n",
      "Epoch: [7][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4407e-01 (9.4482e-01)\n",
      "Epoch: [7][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0421e-01 (9.4487e-01)\n",
      "Epoch: [7][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0217e+00 (9.4545e-01)\n",
      "Epoch: [7][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6890e-01 (9.4573e-01)\n",
      "Epoch: [7][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5647e-01 (9.4594e-01)\n",
      "Epoch: [7][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4102e-01 (9.4621e-01)\n",
      "Epoch: [7][3300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4686e-01 (9.4665e-01)\n",
      "Epoch: [7][3350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9921e-01 (9.4688e-01)\n",
      "Epoch: [7][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9452e-01 (9.4679e-01)\n",
      "Epoch: [7][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0303e+00 (9.4721e-01)\n",
      "Epoch: [7][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9276e-01 (9.4693e-01)\n",
      "Epoch: [7][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1254e+00 (9.4742e-01)\n",
      "Epoch: [7][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8444e-01 (9.4781e-01)\n",
      "Epoch: [7][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1161e+00 (9.4759e-01)\n",
      "Epoch: [7][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2178e-01 (9.4801e-01)\n",
      "Epoch: [7][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2046e+00 (9.4837e-01)\n",
      "Epoch: [7][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8202e-01 (9.4840e-01)\n",
      "Epoch: [7][3850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9539e-01 (9.4864e-01)\n",
      "Epoch: [7][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2980e-01 (9.4864e-01)\n",
      "Epoch: [7][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3388e-01 (9.4895e-01)\n",
      "Epoch: [7][4000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4627e-01 (9.4910e-01)\n",
      "Epoch: [7][4050/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.6441e-01 (9.4936e-01)\n",
      "Epoch: [7][4100/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.7769e-01 (9.4944e-01)\n",
      "Epoch: [7][4150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.6426e-01 (9.4956e-01)\n",
      "Epoch: [7][4200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9563e-01 (9.4985e-01)\n",
      "Epoch: [7][4250/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.8880e-01 (9.4998e-01)\n",
      "Epoch: [7][4300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0479e+00 (9.5011e-01)\n",
      "Epoch: [7][4350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.1704e-01 (9.4999e-01)\n",
      "Epoch: [7][4400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0459e+00 (9.5025e-01)\n",
      "Epoch: [7][4450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.1962e-01 (9.5027e-01)\n",
      "Epoch: [7][4500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.8506e-01 (9.5046e-01)\n",
      "Epoch: [7][4550/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.0331e-01 (9.5063e-01)\n",
      "Epoch: [7][4600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4142e-01 (9.5080e-01)\n",
      "Epoch: [7][4650/5005]\tTime  0.562 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.0302e-01 (9.5096e-01)\n",
      "Epoch: [7][4700/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0181e+00 (9.5133e-01)\n",
      "Epoch: [7][4750/5005]\tTime  0.561 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.6103e-01 (9.5140e-01)\n",
      "Epoch: [7][4800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1778e+00 (9.5159e-01)\n",
      "Epoch: [7][4850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0963e+00 (9.5140e-01)\n",
      "Epoch: [7][4900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4058e-01 (9.5137e-01)\n",
      "Epoch: [7][4950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.5194e-01 (9.5140e-01)\n",
      "Epoch: [7][5000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0067e+00 (9.5148e-01)\n",
      "Test: [  0/196]\tTime  3.333 ( 3.333)\tLoss 6.5791e-01 (6.5791e-01)\tAcc@1  82.81 ( 82.81)\tAcc@5  96.48 ( 96.48)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 4.8007e-01 (8.0124e-01)\tAcc@1  87.89 ( 78.72)\tAcc@5  97.27 ( 94.75)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 1.3399e+00 (9.2762e-01)\tAcc@1  61.33 ( 75.72)\tAcc@5  89.06 ( 93.37)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.2720e+00 (1.0487e+00)\tAcc@1  71.48 ( 73.46)\tAcc@5  89.06 ( 91.73)\n",
      "epoch 7 0.9515117987812275 72.51200103759766 0.006000000000000002 4698510 0.20040132214688156\n",
      "Epoch: [8][   0/5005]\tTime  3.208 ( 3.208)\tData  2.648 ( 2.648)\tLoss 8.7304e-01 (8.7304e-01)\n",
      "Epoch: [8][  50/5005]\tTime  0.559 ( 0.611)\tData  0.000 ( 0.052)\tLoss 9.5669e-01 (9.1540e-01)\n",
      "Epoch: [8][ 100/5005]\tTime  0.560 ( 0.585)\tData  0.000 ( 0.026)\tLoss 9.7956e-01 (9.1327e-01)\n",
      "Epoch: [8][ 150/5005]\tTime  0.558 ( 0.577)\tData  0.000 ( 0.018)\tLoss 9.0067e-01 (9.1244e-01)\n",
      "Epoch: [8][ 200/5005]\tTime  0.558 ( 0.572)\tData  0.000 ( 0.013)\tLoss 8.9880e-01 (9.1479e-01)\n",
      "Epoch: [8][ 250/5005]\tTime  0.560 ( 0.570)\tData  0.000 ( 0.011)\tLoss 8.8328e-01 (9.1931e-01)\n",
      "Epoch: [8][ 300/5005]\tTime  0.560 ( 0.568)\tData  0.001 ( 0.009)\tLoss 9.2000e-01 (9.1757e-01)\n",
      "Epoch: [8][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 8.1706e-01 (9.1765e-01)\n",
      "Epoch: [8][ 400/5005]\tTime  0.561 ( 0.566)\tData  0.000 ( 0.007)\tLoss 9.6739e-01 (9.1665e-01)\n",
      "Epoch: [8][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 9.5556e-01 (9.1641e-01)\n",
      "Epoch: [8][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 9.7526e-01 (9.1656e-01)\n",
      "Epoch: [8][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.2352e-01 (9.1550e-01)\n",
      "Epoch: [8][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0069e+00 (9.1337e-01)\n",
      "Epoch: [8][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.8275e-01 (9.1593e-01)\n",
      "Epoch: [8][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.6058e-01 (9.1615e-01)\n",
      "Epoch: [8][ 750/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.4839e-01 (9.1675e-01)\n",
      "Epoch: [8][ 800/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0941e+00 (9.1758e-01)\n",
      "Epoch: [8][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.3714e-01 (9.1825e-01)\n",
      "Epoch: [8][ 900/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.3176e-01 (9.1797e-01)\n",
      "Epoch: [8][ 950/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.1625e-01 (9.1918e-01)\n",
      "Epoch: [8][1000/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.8553e-01 (9.2041e-01)\n",
      "Epoch: [8][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.9645e-01 (9.1972e-01)\n",
      "Epoch: [8][1100/5005]\tTime  0.562 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.4830e-01 (9.2043e-01)\n",
      "Epoch: [8][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0412e+00 (9.2139e-01)\n",
      "Epoch: [8][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8400e-01 (9.2078e-01)\n",
      "Epoch: [8][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2581e-01 (9.2065e-01)\n",
      "Epoch: [8][1300/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1949e-01 (9.2064e-01)\n",
      "Epoch: [8][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1266e-01 (9.1986e-01)\n",
      "Epoch: [8][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5607e-01 (9.2024e-01)\n",
      "Epoch: [8][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4954e-01 (9.2009e-01)\n",
      "Epoch: [8][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1478e-01 (9.2023e-01)\n",
      "Epoch: [8][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1490e+00 (9.2087e-01)\n",
      "Epoch: [8][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.5386e-01 (9.2140e-01)\n",
      "Epoch: [8][1650/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1723e-01 (9.2207e-01)\n",
      "Epoch: [8][1700/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.9076e-01 (9.2319e-01)\n",
      "Epoch: [8][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4156e-01 (9.2321e-01)\n",
      "Epoch: [8][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.1049e-01 (9.2346e-01)\n",
      "Epoch: [8][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.2740e-01 (9.2428e-01)\n",
      "Epoch: [8][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0449e+00 (9.2447e-01)\n",
      "Epoch: [8][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6340e-01 (9.2502e-01)\n",
      "Epoch: [8][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0555e+00 (9.2525e-01)\n",
      "Epoch: [8][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.8563e-01 (9.2514e-01)\n",
      "Epoch: [8][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4769e-01 (9.2530e-01)\n",
      "Epoch: [8][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8228e-01 (9.2537e-01)\n",
      "Epoch: [8][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5911e-01 (9.2547e-01)\n",
      "Epoch: [8][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5837e-01 (9.2561e-01)\n",
      "Epoch: [8][2300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0083e+00 (9.2606e-01)\n",
      "Epoch: [8][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0118e+00 (9.2598e-01)\n",
      "Epoch: [8][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4799e-01 (9.2637e-01)\n",
      "Epoch: [8][2450/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9112e-01 (9.2659e-01)\n",
      "Epoch: [8][2500/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0087e+00 (9.2731e-01)\n",
      "Epoch: [8][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3739e-01 (9.2729e-01)\n",
      "Epoch: [8][2600/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0485e+00 (9.2745e-01)\n",
      "Epoch: [8][2650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5449e-01 (9.2755e-01)\n",
      "Epoch: [8][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1250e+00 (9.2786e-01)\n",
      "Epoch: [8][2750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4365e-01 (9.2825e-01)\n",
      "Epoch: [8][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6432e-01 (9.2838e-01)\n",
      "Epoch: [8][2850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0720e+00 (9.2850e-01)\n",
      "Epoch: [8][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1561e-01 (9.2848e-01)\n",
      "Epoch: [8][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8847e-01 (9.2910e-01)\n",
      "Epoch: [8][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3096e-01 (9.2927e-01)\n",
      "Epoch: [8][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0317e+00 (9.2959e-01)\n",
      "Epoch: [8][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6542e-01 (9.2987e-01)\n",
      "Epoch: [8][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1924e-01 (9.2989e-01)\n",
      "Epoch: [8][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1735e-01 (9.2994e-01)\n",
      "Epoch: [8][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9179e-01 (9.3030e-01)\n",
      "Epoch: [8][3300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0248e+00 (9.3030e-01)\n",
      "Epoch: [8][3350/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7319e-01 (9.3073e-01)\n",
      "Epoch: [8][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7780e-01 (9.3075e-01)\n",
      "Epoch: [8][3450/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8003e-01 (9.3114e-01)\n",
      "Epoch: [8][3500/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 1.0900e+00 (9.3126e-01)\n",
      "Epoch: [8][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9066e-01 (9.3145e-01)\n",
      "Epoch: [8][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0069e+00 (9.3158e-01)\n",
      "Epoch: [8][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6673e-01 (9.3170e-01)\n",
      "Epoch: [8][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5370e-01 (9.3190e-01)\n",
      "Epoch: [8][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0938e+00 (9.3199e-01)\n",
      "Epoch: [8][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8148e-01 (9.3199e-01)\n",
      "Epoch: [8][3850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0603e+00 (9.3270e-01)\n",
      "Epoch: [8][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0104e+00 (9.3288e-01)\n",
      "Epoch: [8][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2237e-01 (9.3315e-01)\n",
      "Epoch: [8][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0647e+00 (9.3335e-01)\n",
      "Epoch: [8][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1296e-01 (9.3352e-01)\n",
      "Epoch: [8][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9620e-01 (9.3366e-01)\n",
      "Epoch: [8][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9887e-01 (9.3389e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [8][4200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0631e+00 (9.3401e-01)\n",
      "Epoch: [8][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3810e-01 (9.3406e-01)\n",
      "Epoch: [8][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3702e-01 (9.3434e-01)\n",
      "Epoch: [8][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8409e-01 (9.3438e-01)\n",
      "Epoch: [8][4400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9458e-01 (9.3452e-01)\n",
      "Epoch: [8][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0388e+00 (9.3466e-01)\n",
      "Epoch: [8][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6537e-01 (9.3459e-01)\n",
      "Epoch: [8][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7045e-01 (9.3490e-01)\n",
      "Epoch: [8][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9674e-01 (9.3506e-01)\n",
      "Epoch: [8][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6445e-01 (9.3499e-01)\n",
      "Epoch: [8][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9723e-01 (9.3529e-01)\n",
      "Epoch: [8][4750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1114e+00 (9.3548e-01)\n",
      "Epoch: [8][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0820e+00 (9.3550e-01)\n",
      "Epoch: [8][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5972e-01 (9.3556e-01)\n",
      "Epoch: [8][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6958e-01 (9.3564e-01)\n",
      "Epoch: [8][4950/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1552e-01 (9.3555e-01)\n",
      "Epoch: [8][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0042e+00 (9.3577e-01)\n",
      "Test: [  0/196]\tTime  3.306 ( 3.306)\tLoss 5.5977e-01 (5.5977e-01)\tAcc@1  84.38 ( 84.38)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 4.5869e-01 (7.8467e-01)\tAcc@1  89.84 ( 78.88)\tAcc@5  97.27 ( 94.98)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 1.3182e+00 (9.1755e-01)\tAcc@1  62.11 ( 75.95)\tAcc@5  88.67 ( 93.41)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.2733e+00 (1.0389e+00)\tAcc@1  70.31 ( 73.53)\tAcc@5  87.89 ( 91.78)\n",
      "epoch 8 0.9357715407995425 72.45199584960938 0.005500000000000002 4698510 0.20040132214688156\n",
      "Epoch: [9][   0/5005]\tTime  3.235 ( 3.235)\tData  2.676 ( 2.676)\tLoss 7.4452e-01 (7.4452e-01)\n",
      "Epoch: [9][  50/5005]\tTime  0.558 ( 0.611)\tData  0.000 ( 0.053)\tLoss 9.0454e-01 (9.0332e-01)\n",
      "Epoch: [9][ 100/5005]\tTime  0.558 ( 0.585)\tData  0.000 ( 0.027)\tLoss 7.8634e-01 (8.9193e-01)\n",
      "Epoch: [9][ 150/5005]\tTime  0.559 ( 0.577)\tData  0.000 ( 0.018)\tLoss 7.8257e-01 (8.9188e-01)\n",
      "Epoch: [9][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.014)\tLoss 9.3183e-01 (8.8734e-01)\n",
      "Epoch: [9][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.011)\tLoss 8.7284e-01 (8.8931e-01)\n",
      "Epoch: [9][ 300/5005]\tTime  0.558 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.0327e+00 (8.9431e-01)\n",
      "Epoch: [9][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.008)\tLoss 7.9736e-01 (8.9822e-01)\n",
      "Epoch: [9][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 8.1066e-01 (8.9926e-01)\n",
      "Epoch: [9][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.9269e-01 (9.0044e-01)\n",
      "Epoch: [9][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.0318e+00 (9.0089e-01)\n",
      "Epoch: [9][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 8.8341e-01 (9.0140e-01)\n",
      "Epoch: [9][ 600/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 8.1079e-01 (9.0276e-01)\n",
      "Epoch: [9][ 650/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.9758e-01 (9.0161e-01)\n",
      "Epoch: [9][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0249e+00 (9.0268e-01)\n",
      "Epoch: [9][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 9.4078e-01 (9.0401e-01)\n",
      "Epoch: [9][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 9.9396e-01 (9.0525e-01)\n",
      "Epoch: [9][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0230e+00 (9.0557e-01)\n",
      "Epoch: [9][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0321e+00 (9.0644e-01)\n",
      "Epoch: [9][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0907e+00 (9.0538e-01)\n",
      "Epoch: [9][1000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0767e+00 (9.0614e-01)\n",
      "Epoch: [9][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0092e+00 (9.0618e-01)\n",
      "Epoch: [9][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0047e+00 (9.0715e-01)\n",
      "Epoch: [9][1150/5005]\tTime  0.562 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.8506e-01 (9.0671e-01)\n",
      "Epoch: [9][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0600e+00 (9.0657e-01)\n",
      "Epoch: [9][1250/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0833e-01 (9.0715e-01)\n",
      "Epoch: [9][1300/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9407e-01 (9.0766e-01)\n",
      "Epoch: [9][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.5745e-01 (9.0804e-01)\n",
      "Epoch: [9][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3758e-01 (9.0794e-01)\n",
      "Epoch: [9][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.5529e-01 (9.0805e-01)\n",
      "Epoch: [9][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.1919e-01 (9.0754e-01)\n",
      "Epoch: [9][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8667e-01 (9.0732e-01)\n",
      "Epoch: [9][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8735e-01 (9.0747e-01)\n",
      "Epoch: [9][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.1346e-01 (9.0729e-01)\n",
      "Epoch: [9][1700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.1990e-01 (9.0708e-01)\n",
      "Epoch: [9][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6697e-01 (9.0760e-01)\n",
      "Epoch: [9][1800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6543e-01 (9.0788e-01)\n",
      "Epoch: [9][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0139e+00 (9.0805e-01)\n",
      "Epoch: [9][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.2015e-01 (9.0794e-01)\n",
      "Epoch: [9][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.9515e-01 (9.0793e-01)\n",
      "Epoch: [9][2000/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6882e-01 (9.0807e-01)\n",
      "Epoch: [9][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0183e+00 (9.0820e-01)\n",
      "Epoch: [9][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7776e-01 (9.0862e-01)\n",
      "Epoch: [9][2150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0699e+00 (9.0943e-01)\n",
      "Epoch: [9][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9184e-01 (9.0968e-01)\n",
      "Epoch: [9][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9720e-01 (9.0992e-01)\n",
      "Epoch: [9][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0725e-01 (9.0984e-01)\n",
      "Epoch: [9][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1230e+00 (9.1006e-01)\n",
      "Epoch: [9][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3688e-01 (9.1034e-01)\n",
      "Epoch: [9][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5757e-01 (9.1081e-01)\n",
      "Epoch: [9][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9189e-01 (9.1078e-01)\n",
      "Epoch: [9][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1368e-01 (9.1168e-01)\n",
      "Epoch: [9][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7020e-01 (9.1205e-01)\n",
      "Epoch: [9][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5252e-01 (9.1231e-01)\n",
      "Epoch: [9][2700/5005]\tTime  0.563 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1616e+00 (9.1233e-01)\n",
      "Epoch: [9][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2183e-01 (9.1232e-01)\n",
      "Epoch: [9][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6191e-01 (9.1264e-01)\n",
      "Epoch: [9][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7638e-01 (9.1297e-01)\n",
      "Epoch: [9][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0981e-01 (9.1339e-01)\n",
      "Epoch: [9][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8063e-01 (9.1368e-01)\n",
      "Epoch: [9][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0459e+00 (9.1394e-01)\n",
      "Epoch: [9][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3293e-01 (9.1452e-01)\n",
      "Epoch: [9][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8420e-01 (9.1449e-01)\n",
      "Epoch: [9][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3751e-01 (9.1497e-01)\n",
      "Epoch: [9][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2269e-01 (9.1485e-01)\n",
      "Epoch: [9][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8243e-01 (9.1475e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [9][3300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5172e-01 (9.1453e-01)\n",
      "Epoch: [9][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0276e+00 (9.1477e-01)\n",
      "Epoch: [9][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9390e-01 (9.1452e-01)\n",
      "Epoch: [9][3450/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0690e-01 (9.1446e-01)\n",
      "Epoch: [9][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0410e+00 (9.1502e-01)\n",
      "Epoch: [9][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2686e-01 (9.1511e-01)\n",
      "Epoch: [9][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0212e-01 (9.1546e-01)\n",
      "Epoch: [9][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1757e-01 (9.1578e-01)\n",
      "Epoch: [9][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7530e-01 (9.1573e-01)\n",
      "Epoch: [9][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3165e-01 (9.1557e-01)\n",
      "Epoch: [9][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8450e-01 (9.1573e-01)\n",
      "Epoch: [9][3850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0856e-01 (9.1596e-01)\n",
      "Epoch: [9][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6809e-01 (9.1647e-01)\n",
      "Epoch: [9][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4846e-01 (9.1668e-01)\n",
      "Epoch: [9][4000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0648e-01 (9.1664e-01)\n",
      "Epoch: [9][4050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6783e-01 (9.1681e-01)\n",
      "Epoch: [9][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0469e+00 (9.1704e-01)\n",
      "Epoch: [9][4150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0000e+00 (9.1743e-01)\n",
      "Epoch: [9][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0260e+00 (9.1744e-01)\n",
      "Epoch: [9][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0732e-01 (9.1753e-01)\n",
      "Epoch: [9][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0323e+00 (9.1746e-01)\n",
      "Epoch: [9][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8417e-01 (9.1778e-01)\n",
      "Epoch: [9][4400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5089e-01 (9.1790e-01)\n",
      "Epoch: [9][4450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.9132e-01 (9.1811e-01)\n",
      "Epoch: [9][4500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.3210e-01 (9.1845e-01)\n",
      "Epoch: [9][4550/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.3623e-01 (9.1890e-01)\n",
      "Epoch: [9][4600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0077e+00 (9.1916e-01)\n",
      "Epoch: [9][4650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0422e+00 (9.1931e-01)\n",
      "Epoch: [9][4700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.8269e-01 (9.1955e-01)\n",
      "Epoch: [9][4750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4396e-01 (9.1959e-01)\n",
      "Epoch: [9][4800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0088e+00 (9.1981e-01)\n",
      "Epoch: [9][4850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.0188e-01 (9.1999e-01)\n",
      "Epoch: [9][4900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.9450e-01 (9.2018e-01)\n",
      "Epoch: [9][4950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9251e-01 (9.2047e-01)\n",
      "Epoch: [9][5000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0027e+00 (9.2064e-01)\n",
      "Test: [  0/196]\tTime  3.413 ( 3.413)\tLoss 6.0473e-01 (6.0473e-01)\tAcc@1  83.59 ( 83.59)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.437)\tLoss 5.6250e-01 (7.8450e-01)\tAcc@1  85.55 ( 78.84)\tAcc@5  96.88 ( 94.98)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 1.4001e+00 (9.0993e-01)\tAcc@1  57.03 ( 76.04)\tAcc@5  87.50 ( 93.57)\n",
      "Test: [150/196]\tTime  0.378 ( 0.398)\tLoss 1.1315e+00 (1.0308e+00)\tAcc@1  75.00 ( 73.69)\tAcc@5  92.19 ( 91.87)\n",
      "epoch 9 0.9206965876634784 72.72599792480469 0.005000000000000002 4698510 0.20040132214688156\n",
      "Epoch: [10][   0/5005]\tTime  3.020 ( 3.020)\tData  2.454 ( 2.454)\tLoss 1.0610e+00 (1.0610e+00)\n",
      "Epoch: [10][  50/5005]\tTime  0.559 ( 0.607)\tData  0.000 ( 0.048)\tLoss 7.9579e-01 (8.8507e-01)\n",
      "Epoch: [10][ 100/5005]\tTime  0.558 ( 0.583)\tData  0.000 ( 0.025)\tLoss 1.0707e+00 (8.7737e-01)\n",
      "Epoch: [10][ 150/5005]\tTime  0.559 ( 0.575)\tData  0.000 ( 0.016)\tLoss 9.0714e-01 (8.8045e-01)\n",
      "Epoch: [10][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.012)\tLoss 9.6448e-01 (8.7781e-01)\n",
      "Epoch: [10][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 8.6970e-01 (8.7868e-01)\n",
      "Epoch: [10][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.0018e+00 (8.8097e-01)\n",
      "Epoch: [10][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 8.0233e-01 (8.8449e-01)\n",
      "Epoch: [10][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 7.8588e-01 (8.8759e-01)\n",
      "Epoch: [10][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.6444e-01 (8.9115e-01)\n",
      "Epoch: [10][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.7143e-01 (8.8926e-01)\n",
      "Epoch: [10][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 8.5691e-01 (8.8804e-01)\n",
      "Epoch: [10][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.5581e-01 (8.8852e-01)\n",
      "Epoch: [10][ 650/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.6350e-01 (8.8787e-01)\n",
      "Epoch: [10][ 700/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0110e+00 (8.8685e-01)\n",
      "Epoch: [10][ 750/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.9836e-01 (8.8813e-01)\n",
      "Epoch: [10][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.6396e-01 (8.8812e-01)\n",
      "Epoch: [10][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.4119e-01 (8.8898e-01)\n",
      "Epoch: [10][ 900/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.8615e-01 (8.9013e-01)\n",
      "Epoch: [10][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.7989e-01 (8.8950e-01)\n",
      "Epoch: [10][1000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0510e+00 (8.8993e-01)\n",
      "Epoch: [10][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.9379e-01 (8.9063e-01)\n",
      "Epoch: [10][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6036e-01 (8.9070e-01)\n",
      "Epoch: [10][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0771e+00 (8.9092e-01)\n",
      "Epoch: [10][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0204e+00 (8.9180e-01)\n",
      "Epoch: [10][1250/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.2648e-01 (8.9074e-01)\n",
      "Epoch: [10][1300/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6997e-01 (8.9103e-01)\n",
      "Epoch: [10][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.3421e-01 (8.9236e-01)\n",
      "Epoch: [10][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6196e-01 (8.9255e-01)\n",
      "Epoch: [10][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6324e-01 (8.9137e-01)\n",
      "Epoch: [10][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0446e+00 (8.9185e-01)\n",
      "Epoch: [10][1550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.2009e-01 (8.9213e-01)\n",
      "Epoch: [10][1600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.1159e-01 (8.9215e-01)\n",
      "Epoch: [10][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.1194e-01 (8.9184e-01)\n",
      "Epoch: [10][1700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.4746e-01 (8.9140e-01)\n",
      "Epoch: [10][1750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6785e-01 (8.9218e-01)\n",
      "Epoch: [10][1800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.4582e-01 (8.9198e-01)\n",
      "Epoch: [10][1850/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.0545e-01 (8.9163e-01)\n",
      "Epoch: [10][1900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.4815e-01 (8.9233e-01)\n",
      "Epoch: [10][1950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6622e-01 (8.9226e-01)\n",
      "Epoch: [10][2000/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6779e-01 (8.9274e-01)\n",
      "Epoch: [10][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5969e-01 (8.9325e-01)\n",
      "Epoch: [10][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0084e-01 (8.9340e-01)\n",
      "Epoch: [10][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5948e-01 (8.9375e-01)\n",
      "Epoch: [10][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7615e-01 (8.9411e-01)\n",
      "Epoch: [10][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4081e-01 (8.9480e-01)\n",
      "Epoch: [10][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7898e-01 (8.9547e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [10][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5394e-01 (8.9557e-01)\n",
      "Epoch: [10][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7244e-01 (8.9584e-01)\n",
      "Epoch: [10][2450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9894e-01 (8.9592e-01)\n",
      "Epoch: [10][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8286e-01 (8.9580e-01)\n",
      "Epoch: [10][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9123e-01 (8.9588e-01)\n",
      "Epoch: [10][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8129e-01 (8.9661e-01)\n",
      "Epoch: [10][2650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7716e-01 (8.9666e-01)\n",
      "Epoch: [10][2700/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0245e-01 (8.9696e-01)\n",
      "Epoch: [10][2750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5191e-01 (8.9683e-01)\n",
      "Epoch: [10][2800/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0556e+00 (8.9739e-01)\n",
      "Epoch: [10][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8540e-01 (8.9766e-01)\n",
      "Epoch: [10][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2910e-01 (8.9784e-01)\n",
      "Epoch: [10][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5971e-01 (8.9793e-01)\n",
      "Epoch: [10][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8810e-01 (8.9816e-01)\n",
      "Epoch: [10][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7318e-01 (8.9821e-01)\n",
      "Epoch: [10][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2796e-01 (8.9824e-01)\n",
      "Epoch: [10][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8590e-01 (8.9803e-01)\n",
      "Epoch: [10][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7288e-01 (8.9775e-01)\n",
      "Epoch: [10][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0536e-01 (8.9765e-01)\n",
      "Epoch: [10][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3756e-01 (8.9758e-01)\n",
      "Epoch: [10][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7534e-01 (8.9807e-01)\n",
      "Epoch: [10][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7896e-01 (8.9791e-01)\n",
      "Epoch: [10][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0721e+00 (8.9813e-01)\n",
      "Epoch: [10][3500/5005]\tTime  0.563 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4532e-01 (8.9796e-01)\n",
      "Epoch: [10][3550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9732e-01 (8.9821e-01)\n",
      "Epoch: [10][3600/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5511e-01 (8.9829e-01)\n",
      "Epoch: [10][3650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4690e-01 (8.9836e-01)\n",
      "Epoch: [10][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3017e-01 (8.9845e-01)\n",
      "Epoch: [10][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0569e-01 (8.9851e-01)\n",
      "Epoch: [10][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8382e-01 (8.9881e-01)\n",
      "Epoch: [10][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0530e+00 (8.9922e-01)\n",
      "Epoch: [10][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5333e-01 (8.9983e-01)\n",
      "Epoch: [10][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9227e-01 (8.9986e-01)\n",
      "Epoch: [10][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4851e-01 (9.0000e-01)\n",
      "Epoch: [10][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8453e-01 (9.0022e-01)\n",
      "Epoch: [10][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1577e-01 (9.0054e-01)\n",
      "Epoch: [10][4150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5999e-01 (9.0089e-01)\n",
      "Epoch: [10][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7333e-01 (9.0096e-01)\n",
      "Epoch: [10][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6748e-01 (9.0089e-01)\n",
      "Epoch: [10][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9215e-01 (9.0090e-01)\n",
      "Epoch: [10][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1851e-01 (9.0086e-01)\n",
      "Epoch: [10][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8435e-01 (9.0121e-01)\n",
      "Epoch: [10][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0595e+00 (9.0134e-01)\n",
      "Epoch: [10][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8921e-01 (9.0147e-01)\n",
      "Epoch: [10][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0999e-01 (9.0179e-01)\n",
      "Epoch: [10][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0090e+00 (9.0190e-01)\n",
      "Epoch: [10][4650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8277e-01 (9.0186e-01)\n",
      "Epoch: [10][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8814e-01 (9.0207e-01)\n",
      "Epoch: [10][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0165e-01 (9.0213e-01)\n",
      "Epoch: [10][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8570e-01 (9.0212e-01)\n",
      "Epoch: [10][4850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4674e-01 (9.0234e-01)\n",
      "Epoch: [10][4900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0805e+00 (9.0239e-01)\n",
      "Epoch: [10][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0156e+00 (9.0252e-01)\n",
      "Epoch: [10][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0591e+00 (9.0254e-01)\n",
      "Test: [  0/196]\tTime  3.393 ( 3.393)\tLoss 5.5451e-01 (5.5451e-01)\tAcc@1  86.72 ( 86.72)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.437)\tLoss 6.4132e-01 (7.7468e-01)\tAcc@1  86.72 ( 79.56)\tAcc@5  95.70 ( 94.98)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.4848e+00 (9.1259e-01)\tAcc@1  58.20 ( 76.35)\tAcc@5  87.11 ( 93.49)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.1212e+00 (1.0321e+00)\tAcc@1  74.61 ( 73.94)\tAcc@5  90.23 ( 91.87)\n",
      "epoch 10 0.9026297084326241 72.9679946899414 0.004500000000000001 4698510 0.20040132214688156\n",
      "Epoch: [11][   0/5005]\tTime  3.053 ( 3.053)\tData  2.490 ( 2.490)\tLoss 8.0333e-01 (8.0333e-01)\n",
      "Epoch: [11][  50/5005]\tTime  0.559 ( 0.608)\tData  0.000 ( 0.049)\tLoss 9.2644e-01 (8.7519e-01)\n",
      "Epoch: [11][ 100/5005]\tTime  0.558 ( 0.584)\tData  0.000 ( 0.025)\tLoss 9.0808e-01 (8.7354e-01)\n",
      "Epoch: [11][ 150/5005]\tTime  0.558 ( 0.575)\tData  0.000 ( 0.017)\tLoss 8.3117e-01 (8.7144e-01)\n",
      "Epoch: [11][ 200/5005]\tTime  0.558 ( 0.571)\tData  0.000 ( 0.013)\tLoss 8.9124e-01 (8.6996e-01)\n",
      "Epoch: [11][ 250/5005]\tTime  0.558 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.0248e+00 (8.7306e-01)\n",
      "Epoch: [11][ 300/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.008)\tLoss 8.7899e-01 (8.7318e-01)\n",
      "Epoch: [11][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.007)\tLoss 8.9154e-01 (8.7229e-01)\n",
      "Epoch: [11][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.5628e-01 (8.7360e-01)\n",
      "Epoch: [11][ 450/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 9.1054e-01 (8.7322e-01)\n",
      "Epoch: [11][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 8.4792e-01 (8.7282e-01)\n",
      "Epoch: [11][ 550/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 9.2299e-01 (8.7317e-01)\n",
      "Epoch: [11][ 600/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.4674e-01 (8.7273e-01)\n",
      "Epoch: [11][ 650/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 7.5355e-01 (8.7215e-01)\n",
      "Epoch: [11][ 700/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 8.3189e-01 (8.7284e-01)\n",
      "Epoch: [11][ 750/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0536e+00 (8.7305e-01)\n",
      "Epoch: [11][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.8073e-01 (8.7311e-01)\n",
      "Epoch: [11][ 850/5005]\tTime  0.557 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0154e+00 (8.7492e-01)\n",
      "Epoch: [11][ 900/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.1842e-01 (8.7571e-01)\n",
      "Epoch: [11][ 950/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.6680e-01 (8.7463e-01)\n",
      "Epoch: [11][1000/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.2030e-01 (8.7476e-01)\n",
      "Epoch: [11][1050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.9138e-01 (8.7464e-01)\n",
      "Epoch: [11][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3096e-01 (8.7394e-01)\n",
      "Epoch: [11][1150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3924e-01 (8.7369e-01)\n",
      "Epoch: [11][1200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.8007e-01 (8.7275e-01)\n",
      "Epoch: [11][1250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.1190e-01 (8.7142e-01)\n",
      "Epoch: [11][1300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0400e+00 (8.7226e-01)\n",
      "Epoch: [11][1350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.5025e-01 (8.7342e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [11][1400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.1933e-01 (8.7380e-01)\n",
      "Epoch: [11][1450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6606e-01 (8.7429e-01)\n",
      "Epoch: [11][1500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.8474e-01 (8.7445e-01)\n",
      "Epoch: [11][1550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.0149e-01 (8.7482e-01)\n",
      "Epoch: [11][1600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.1726e-01 (8.7488e-01)\n",
      "Epoch: [11][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.4511e-01 (8.7513e-01)\n",
      "Epoch: [11][1700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.0583e-01 (8.7511e-01)\n",
      "Epoch: [11][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6052e-01 (8.7479e-01)\n",
      "Epoch: [11][1800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.5248e-01 (8.7455e-01)\n",
      "Epoch: [11][1850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.2905e-01 (8.7504e-01)\n",
      "Epoch: [11][1900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5857e-01 (8.7510e-01)\n",
      "Epoch: [11][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6705e-01 (8.7565e-01)\n",
      "Epoch: [11][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9353e-01 (8.7583e-01)\n",
      "Epoch: [11][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4321e-01 (8.7641e-01)\n",
      "Epoch: [11][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3462e-01 (8.7671e-01)\n",
      "Epoch: [11][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3469e-01 (8.7667e-01)\n",
      "Epoch: [11][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4625e-01 (8.7656e-01)\n",
      "Epoch: [11][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2267e-01 (8.7664e-01)\n",
      "Epoch: [11][2300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0648e-01 (8.7657e-01)\n",
      "Epoch: [11][2350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.6611e-01 (8.7710e-01)\n",
      "Epoch: [11][2400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.5746e-01 (8.7755e-01)\n",
      "Epoch: [11][2450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.6752e-01 (8.7785e-01)\n",
      "Epoch: [11][2500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.8545e-01 (8.7812e-01)\n",
      "Epoch: [11][2550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.7368e-01 (8.7775e-01)\n",
      "Epoch: [11][2600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.2018e-01 (8.7825e-01)\n",
      "Epoch: [11][2650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.3232e-01 (8.7788e-01)\n",
      "Epoch: [11][2700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.8495e-01 (8.7796e-01)\n",
      "Epoch: [11][2750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.6860e-01 (8.7837e-01)\n",
      "Epoch: [11][2800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.2282e-01 (8.7852e-01)\n",
      "Epoch: [11][2850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.8449e-01 (8.7890e-01)\n",
      "Epoch: [11][2900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4205e-01 (8.7942e-01)\n",
      "Epoch: [11][2950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9915e-01 (8.7936e-01)\n",
      "Epoch: [11][3000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.1658e-01 (8.7929e-01)\n",
      "Epoch: [11][3050/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.4169e-01 (8.7904e-01)\n",
      "Epoch: [11][3100/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.4995e-01 (8.7886e-01)\n",
      "Epoch: [11][3150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.2694e-01 (8.7925e-01)\n",
      "Epoch: [11][3200/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0557e+00 (8.7908e-01)\n",
      "Epoch: [11][3250/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.6418e-01 (8.7933e-01)\n",
      "Epoch: [11][3300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0621e+00 (8.7954e-01)\n",
      "Epoch: [11][3350/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.2377e-01 (8.7956e-01)\n",
      "Epoch: [11][3400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4048e-01 (8.8018e-01)\n",
      "Epoch: [11][3450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.6140e-01 (8.7984e-01)\n",
      "Epoch: [11][3500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.4428e-01 (8.7996e-01)\n",
      "Epoch: [11][3550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9265e-01 (8.7992e-01)\n",
      "Epoch: [11][3600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.6048e-01 (8.8044e-01)\n",
      "Epoch: [11][3650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9297e-01 (8.8045e-01)\n",
      "Epoch: [11][3700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4303e-01 (8.8047e-01)\n",
      "Epoch: [11][3750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.5609e-01 (8.8043e-01)\n",
      "Epoch: [11][3800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.4203e-01 (8.8072e-01)\n",
      "Epoch: [11][3850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.7597e-01 (8.8090e-01)\n",
      "Epoch: [11][3900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.4326e-01 (8.8112e-01)\n",
      "Epoch: [11][3950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.2629e-01 (8.8113e-01)\n",
      "Epoch: [11][4000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.4980e-01 (8.8143e-01)\n",
      "Epoch: [11][4050/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0130e+00 (8.8130e-01)\n",
      "Epoch: [11][4100/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0259e+00 (8.8165e-01)\n",
      "Epoch: [11][4150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1338e+00 (8.8206e-01)\n",
      "Epoch: [11][4200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.2882e-01 (8.8209e-01)\n",
      "Epoch: [11][4250/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9795e-01 (8.8227e-01)\n",
      "Epoch: [11][4300/5005]\tTime  0.560 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.3929e-01 (8.8244e-01)\n",
      "Epoch: [11][4350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.3711e-01 (8.8268e-01)\n",
      "Epoch: [11][4400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.9832e-01 (8.8295e-01)\n",
      "Epoch: [11][4450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.7727e-01 (8.8310e-01)\n",
      "Epoch: [11][4500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1158e+00 (8.8361e-01)\n",
      "Epoch: [11][4550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.3360e-01 (8.8357e-01)\n",
      "Epoch: [11][4600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.5135e-01 (8.8370e-01)\n",
      "Epoch: [11][4650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9584e-01 (8.8358e-01)\n",
      "Epoch: [11][4700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.1361e-01 (8.8366e-01)\n",
      "Epoch: [11][4750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0504e+00 (8.8369e-01)\n",
      "Epoch: [11][4800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.8596e-01 (8.8377e-01)\n",
      "Epoch: [11][4850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.9193e-01 (8.8420e-01)\n",
      "Epoch: [11][4900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.7560e-01 (8.8412e-01)\n",
      "Epoch: [11][4950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.2480e-01 (8.8441e-01)\n",
      "Epoch: [11][5000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.6257e-01 (8.8460e-01)\n",
      "Test: [  0/196]\tTime  3.441 ( 3.441)\tLoss 5.8385e-01 (5.8385e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.437)\tLoss 4.5198e-01 (7.6501e-01)\tAcc@1  90.23 ( 79.73)\tAcc@5  97.27 ( 95.09)\n",
      "Test: [100/196]\tTime  0.377 ( 0.408)\tLoss 1.5009e+00 (8.9499e-01)\tAcc@1  60.55 ( 76.71)\tAcc@5  85.55 ( 93.68)\n",
      "Test: [150/196]\tTime  0.378 ( 0.398)\tLoss 1.1873e+00 (1.0088e+00)\tAcc@1  75.39 ( 74.41)\tAcc@5  89.45 ( 92.24)\n",
      "epoch 11 0.8846950445814429 73.44999694824219 0.004000000000000002 4698510 0.20040132214688156\n",
      "Epoch: [12][   0/5005]\tTime  3.313 ( 3.313)\tData  2.752 ( 2.752)\tLoss 6.7073e-01 (6.7073e-01)\n",
      "Epoch: [12][  50/5005]\tTime  0.559 ( 0.613)\tData  0.000 ( 0.054)\tLoss 7.0973e-01 (8.5199e-01)\n",
      "Epoch: [12][ 100/5005]\tTime  0.559 ( 0.586)\tData  0.000 ( 0.027)\tLoss 7.6185e-01 (8.5722e-01)\n",
      "Epoch: [12][ 150/5005]\tTime  0.559 ( 0.577)\tData  0.000 ( 0.018)\tLoss 8.6801e-01 (8.5503e-01)\n",
      "Epoch: [12][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.014)\tLoss 8.6604e-01 (8.5496e-01)\n",
      "Epoch: [12][ 250/5005]\tTime  0.558 ( 0.570)\tData  0.000 ( 0.011)\tLoss 7.7962e-01 (8.5042e-01)\n",
      "Epoch: [12][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 8.1106e-01 (8.4942e-01)\n",
      "Epoch: [12][ 350/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.008)\tLoss 7.1419e-01 (8.4932e-01)\n",
      "Epoch: [12][ 400/5005]\tTime  0.560 ( 0.566)\tData  0.000 ( 0.007)\tLoss 7.7956e-01 (8.5295e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [12][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.4366e-01 (8.5460e-01)\n",
      "Epoch: [12][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 8.7721e-01 (8.5414e-01)\n",
      "Epoch: [12][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.1172e-01 (8.5495e-01)\n",
      "Epoch: [12][ 600/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 8.3046e-01 (8.5578e-01)\n",
      "Epoch: [12][ 650/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.7503e-01 (8.5552e-01)\n",
      "Epoch: [12][ 700/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.8837e-01 (8.5558e-01)\n",
      "Epoch: [12][ 750/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 8.4215e-01 (8.5472e-01)\n",
      "Epoch: [12][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 7.5991e-01 (8.5515e-01)\n",
      "Epoch: [12][ 850/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0375e+00 (8.5493e-01)\n",
      "Epoch: [12][ 900/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.5979e-01 (8.5418e-01)\n",
      "Epoch: [12][ 950/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.6564e-01 (8.5563e-01)\n",
      "Epoch: [12][1000/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.0586e-01 (8.5580e-01)\n",
      "Epoch: [12][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.0228e+00 (8.5621e-01)\n",
      "Epoch: [12][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.8231e-01 (8.5684e-01)\n",
      "Epoch: [12][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.3462e-01 (8.5678e-01)\n",
      "Epoch: [12][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.8054e-01 (8.5755e-01)\n",
      "Epoch: [12][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9477e-01 (8.5627e-01)\n",
      "Epoch: [12][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.8601e-01 (8.5651e-01)\n",
      "Epoch: [12][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0853e-01 (8.5701e-01)\n",
      "Epoch: [12][1400/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3241e-01 (8.5747e-01)\n",
      "Epoch: [12][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0243e-01 (8.5721e-01)\n",
      "Epoch: [12][1500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.2718e-01 (8.5769e-01)\n",
      "Epoch: [12][1550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.5388e-01 (8.5836e-01)\n",
      "Epoch: [12][1600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.7362e-01 (8.5838e-01)\n",
      "Epoch: [12][1650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.4865e-01 (8.5773e-01)\n",
      "Epoch: [12][1700/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.4841e-01 (8.5786e-01)\n",
      "Epoch: [12][1750/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.9112e-01 (8.5747e-01)\n",
      "Epoch: [12][1800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6314e-01 (8.5786e-01)\n",
      "Epoch: [12][1850/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.002)\tLoss 9.2877e-01 (8.5882e-01)\n",
      "Epoch: [12][1900/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.002)\tLoss 8.7934e-01 (8.5895e-01)\n",
      "Epoch: [12][1950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.6207e-01 (8.5894e-01)\n",
      "Epoch: [12][2000/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.6420e-01 (8.5919e-01)\n",
      "Epoch: [12][2050/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.2748e-01 (8.5926e-01)\n",
      "Epoch: [12][2100/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6878e-01 (8.5906e-01)\n",
      "Epoch: [12][2150/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5479e-01 (8.5940e-01)\n",
      "Epoch: [12][2200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7806e-01 (8.5952e-01)\n",
      "Epoch: [12][2250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4541e-01 (8.5952e-01)\n",
      "Epoch: [12][2300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1429e-01 (8.5967e-01)\n",
      "Epoch: [12][2350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9921e-01 (8.5960e-01)\n",
      "Epoch: [12][2400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6370e-01 (8.5984e-01)\n",
      "Epoch: [12][2450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7547e-01 (8.5987e-01)\n",
      "Epoch: [12][2500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.8121e-01 (8.6005e-01)\n",
      "Epoch: [12][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9291e-01 (8.6029e-01)\n",
      "Epoch: [12][2600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9596e-01 (8.6046e-01)\n",
      "Epoch: [12][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1955e-01 (8.6084e-01)\n",
      "Epoch: [12][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6036e-01 (8.6092e-01)\n",
      "Epoch: [12][2750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2876e-01 (8.6109e-01)\n",
      "Epoch: [12][2800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8401e-01 (8.6118e-01)\n",
      "Epoch: [12][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5293e-01 (8.6112e-01)\n",
      "Epoch: [12][2900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6029e-01 (8.6157e-01)\n",
      "Epoch: [12][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5179e-01 (8.6202e-01)\n",
      "Epoch: [12][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4640e-01 (8.6214e-01)\n",
      "Epoch: [12][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1682e-01 (8.6264e-01)\n",
      "Epoch: [12][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3296e-01 (8.6295e-01)\n",
      "Epoch: [12][3150/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1646e-01 (8.6303e-01)\n",
      "Epoch: [12][3200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2677e-01 (8.6309e-01)\n",
      "Epoch: [12][3250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1898e-01 (8.6307e-01)\n",
      "Epoch: [12][3300/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0892e-01 (8.6342e-01)\n",
      "Epoch: [12][3350/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7927e-01 (8.6338e-01)\n",
      "Epoch: [12][3400/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 9.0319e-01 (8.6333e-01)\n",
      "Epoch: [12][3450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1759e-01 (8.6351e-01)\n",
      "Epoch: [12][3500/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6170e-01 (8.6353e-01)\n",
      "Epoch: [12][3550/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4964e-01 (8.6373e-01)\n",
      "Epoch: [12][3600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1247e-01 (8.6401e-01)\n",
      "Epoch: [12][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8359e-01 (8.6401e-01)\n",
      "Epoch: [12][3700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7255e-01 (8.6433e-01)\n",
      "Epoch: [12][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6721e-01 (8.6433e-01)\n",
      "Epoch: [12][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0614e+00 (8.6452e-01)\n",
      "Epoch: [12][3850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8491e-01 (8.6433e-01)\n",
      "Epoch: [12][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7773e-01 (8.6446e-01)\n",
      "Epoch: [12][3950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9644e-01 (8.6460e-01)\n",
      "Epoch: [12][4000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9080e-01 (8.6500e-01)\n",
      "Epoch: [12][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1269e-01 (8.6525e-01)\n",
      "Epoch: [12][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0329e+00 (8.6544e-01)\n",
      "Epoch: [12][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6581e-01 (8.6548e-01)\n",
      "Epoch: [12][4200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4476e-01 (8.6574e-01)\n",
      "Epoch: [12][4250/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1844e-01 (8.6586e-01)\n",
      "Epoch: [12][4300/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0681e+00 (8.6592e-01)\n",
      "Epoch: [12][4350/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 9.3926e-01 (8.6612e-01)\n",
      "Epoch: [12][4400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3166e-01 (8.6615e-01)\n",
      "Epoch: [12][4450/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5902e-01 (8.6599e-01)\n",
      "Epoch: [12][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6567e-01 (8.6586e-01)\n",
      "Epoch: [12][4550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4524e-01 (8.6579e-01)\n",
      "Epoch: [12][4600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5106e-01 (8.6568e-01)\n",
      "Epoch: [12][4650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8919e-01 (8.6589e-01)\n",
      "Epoch: [12][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0553e-01 (8.6574e-01)\n",
      "Epoch: [12][4750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0365e+00 (8.6577e-01)\n",
      "Epoch: [12][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4371e-01 (8.6594e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [12][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4728e-01 (8.6608e-01)\n",
      "Epoch: [12][4900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5365e-01 (8.6624e-01)\n",
      "Epoch: [12][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7661e-01 (8.6661e-01)\n",
      "Epoch: [12][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0463e+00 (8.6693e-01)\n",
      "Test: [  0/196]\tTime  3.311 ( 3.311)\tLoss 5.7075e-01 (5.7075e-01)\tAcc@1  84.38 ( 84.38)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 4.6218e-01 (7.5216e-01)\tAcc@1  88.67 ( 79.98)\tAcc@5  96.48 ( 95.11)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.3548e+00 (8.8647e-01)\tAcc@1  65.23 ( 76.88)\tAcc@5  88.28 ( 93.78)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.1043e+00 (1.0008e+00)\tAcc@1  76.56 ( 74.66)\tAcc@5  88.67 ( 92.27)\n",
      "epoch 12 0.8669103983416425 73.64799499511719 0.003500000000000001 4698510 0.20040132214688156\n",
      "Epoch: [13][   0/5005]\tTime  2.779 ( 2.779)\tData  2.217 ( 2.217)\tLoss 8.6160e-01 (8.6160e-01)\n",
      "Epoch: [13][  50/5005]\tTime  0.559 ( 0.602)\tData  0.000 ( 0.044)\tLoss 8.0185e-01 (8.5542e-01)\n",
      "Epoch: [13][ 100/5005]\tTime  0.559 ( 0.580)\tData  0.000 ( 0.022)\tLoss 8.2378e-01 (8.3819e-01)\n",
      "Epoch: [13][ 150/5005]\tTime  0.561 ( 0.573)\tData  0.000 ( 0.015)\tLoss 7.2886e-01 (8.3243e-01)\n",
      "Epoch: [13][ 200/5005]\tTime  0.561 ( 0.570)\tData  0.000 ( 0.011)\tLoss 8.2551e-01 (8.3575e-01)\n",
      "Epoch: [13][ 250/5005]\tTime  0.561 ( 0.568)\tData  0.000 ( 0.009)\tLoss 7.8407e-01 (8.3563e-01)\n",
      "Epoch: [13][ 300/5005]\tTime  0.561 ( 0.567)\tData  0.000 ( 0.008)\tLoss 8.0288e-01 (8.3707e-01)\n",
      "Epoch: [13][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.007)\tLoss 7.4193e-01 (8.3900e-01)\n",
      "Epoch: [13][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.5639e-01 (8.3861e-01)\n",
      "Epoch: [13][ 450/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 8.6534e-01 (8.3891e-01)\n",
      "Epoch: [13][ 500/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.5740e-01 (8.3948e-01)\n",
      "Epoch: [13][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.8899e-01 (8.3750e-01)\n",
      "Epoch: [13][ 600/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.6864e-01 (8.3688e-01)\n",
      "Epoch: [13][ 650/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.8454e-01 (8.3580e-01)\n",
      "Epoch: [13][ 700/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.2985e-01 (8.3516e-01)\n",
      "Epoch: [13][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 6.8325e-01 (8.3555e-01)\n",
      "Epoch: [13][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.0977e-01 (8.3507e-01)\n",
      "Epoch: [13][ 850/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0110e+00 (8.3517e-01)\n",
      "Epoch: [13][ 900/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 7.2237e-01 (8.3612e-01)\n",
      "Epoch: [13][ 950/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.8087e-01 (8.3585e-01)\n",
      "Epoch: [13][1000/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.9657e-01 (8.3563e-01)\n",
      "Epoch: [13][1050/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.4524e-01 (8.3587e-01)\n",
      "Epoch: [13][1100/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0320e+00 (8.3668e-01)\n",
      "Epoch: [13][1150/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.0961e-01 (8.3629e-01)\n",
      "Epoch: [13][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8325e-01 (8.3657e-01)\n",
      "Epoch: [13][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2266e-01 (8.3639e-01)\n",
      "Epoch: [13][1300/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4614e-01 (8.3646e-01)\n",
      "Epoch: [13][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0258e-01 (8.3741e-01)\n",
      "Epoch: [13][1400/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.1461e-01 (8.3675e-01)\n",
      "Epoch: [13][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6072e-01 (8.3648e-01)\n",
      "Epoch: [13][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6316e-01 (8.3695e-01)\n",
      "Epoch: [13][1550/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0969e-01 (8.3726e-01)\n",
      "Epoch: [13][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8191e-01 (8.3833e-01)\n",
      "Epoch: [13][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0236e-01 (8.3984e-01)\n",
      "Epoch: [13][1700/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9547e-01 (8.4052e-01)\n",
      "Epoch: [13][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9711e-01 (8.4096e-01)\n",
      "Epoch: [13][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1509e-01 (8.4062e-01)\n",
      "Epoch: [13][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4519e-01 (8.4102e-01)\n",
      "Epoch: [13][1900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7280e-01 (8.4094e-01)\n",
      "Epoch: [13][1950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3183e-01 (8.4089e-01)\n",
      "Epoch: [13][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4660e-01 (8.4107e-01)\n",
      "Epoch: [13][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8125e-01 (8.4148e-01)\n",
      "Epoch: [13][2100/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6211e-01 (8.4168e-01)\n",
      "Epoch: [13][2150/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1843e-01 (8.4126e-01)\n",
      "Epoch: [13][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3847e-01 (8.4165e-01)\n",
      "Epoch: [13][2250/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.7265e-01 (8.4186e-01)\n",
      "Epoch: [13][2300/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 8.0640e-01 (8.4183e-01)\n",
      "Epoch: [13][2350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0393e-01 (8.4235e-01)\n",
      "Epoch: [13][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0034e-01 (8.4277e-01)\n",
      "Epoch: [13][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1855e-01 (8.4272e-01)\n",
      "Epoch: [13][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3739e-01 (8.4284e-01)\n",
      "Epoch: [13][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4291e-01 (8.4309e-01)\n",
      "Epoch: [13][2600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0358e+00 (8.4349e-01)\n",
      "Epoch: [13][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9919e-01 (8.4327e-01)\n",
      "Epoch: [13][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3323e-01 (8.4331e-01)\n",
      "Epoch: [13][2750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0855e-01 (8.4307e-01)\n",
      "Epoch: [13][2800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6203e-01 (8.4337e-01)\n",
      "Epoch: [13][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0826e-01 (8.4331e-01)\n",
      "Epoch: [13][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8887e-01 (8.4375e-01)\n",
      "Epoch: [13][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7285e-01 (8.4405e-01)\n",
      "Epoch: [13][3000/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4696e-01 (8.4383e-01)\n",
      "Epoch: [13][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2527e-01 (8.4398e-01)\n",
      "Epoch: [13][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8094e-01 (8.4435e-01)\n",
      "Epoch: [13][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7795e-01 (8.4449e-01)\n",
      "Epoch: [13][3200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6278e-01 (8.4474e-01)\n",
      "Epoch: [13][3250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8154e-01 (8.4428e-01)\n",
      "Epoch: [13][3300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6007e-01 (8.4472e-01)\n",
      "Epoch: [13][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9582e-01 (8.4475e-01)\n",
      "Epoch: [13][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1029e-01 (8.4513e-01)\n",
      "Epoch: [13][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4706e-01 (8.4521e-01)\n",
      "Epoch: [13][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0944e-01 (8.4508e-01)\n",
      "Epoch: [13][3550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9895e-01 (8.4533e-01)\n",
      "Epoch: [13][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9042e-01 (8.4532e-01)\n",
      "Epoch: [13][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1121e-01 (8.4553e-01)\n",
      "Epoch: [13][3700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9891e-01 (8.4590e-01)\n",
      "Epoch: [13][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1210e+00 (8.4625e-01)\n",
      "Epoch: [13][3800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6104e-01 (8.4638e-01)\n",
      "Epoch: [13][3850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6969e-01 (8.4658e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [13][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4763e-01 (8.4668e-01)\n",
      "Epoch: [13][3950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6633e-01 (8.4652e-01)\n",
      "Epoch: [13][4000/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0685e-01 (8.4662e-01)\n",
      "Epoch: [13][4050/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4958e-01 (8.4680e-01)\n",
      "Epoch: [13][4100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8019e-01 (8.4705e-01)\n",
      "Epoch: [13][4150/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7521e-01 (8.4699e-01)\n",
      "Epoch: [13][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0115e-01 (8.4710e-01)\n",
      "Epoch: [13][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0846e+00 (8.4744e-01)\n",
      "Epoch: [13][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8320e-01 (8.4721e-01)\n",
      "Epoch: [13][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4898e-01 (8.4722e-01)\n",
      "Epoch: [13][4400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1278e-01 (8.4749e-01)\n",
      "Epoch: [13][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7750e-01 (8.4741e-01)\n",
      "Epoch: [13][4500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7135e-01 (8.4768e-01)\n",
      "Epoch: [13][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8711e-01 (8.4770e-01)\n",
      "Epoch: [13][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9032e-01 (8.4781e-01)\n",
      "Epoch: [13][4650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7934e-01 (8.4796e-01)\n",
      "Epoch: [13][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1432e-01 (8.4804e-01)\n",
      "Epoch: [13][4750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7480e-01 (8.4804e-01)\n",
      "Epoch: [13][4800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7366e-01 (8.4812e-01)\n",
      "Epoch: [13][4850/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9500e-01 (8.4824e-01)\n",
      "Epoch: [13][4900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0107e-01 (8.4827e-01)\n",
      "Epoch: [13][4950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8114e-01 (8.4841e-01)\n",
      "Epoch: [13][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0484e+00 (8.4851e-01)\n",
      "Test: [  0/196]\tTime  3.405 ( 3.405)\tLoss 5.9277e-01 (5.9277e-01)\tAcc@1  83.20 ( 83.20)\tAcc@5  96.09 ( 96.09)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.437)\tLoss 5.2981e-01 (7.5386e-01)\tAcc@1  86.33 ( 79.79)\tAcc@5  95.70 ( 95.18)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 1.3685e+00 (8.8069e-01)\tAcc@1  64.45 ( 77.07)\tAcc@5  88.28 ( 93.90)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.1356e+00 (9.9708e-01)\tAcc@1  76.17 ( 74.82)\tAcc@5  89.45 ( 92.37)\n",
      "epoch 13 0.8484625418165511 73.88999938964844 0.0030000000000000014 4698510 0.20040132214688156\n",
      "Epoch: [14][   0/5005]\tTime  3.331 ( 3.331)\tData  2.767 ( 2.767)\tLoss 7.0442e-01 (7.0442e-01)\n",
      "Epoch: [14][  50/5005]\tTime  0.558 ( 0.613)\tData  0.000 ( 0.054)\tLoss 8.3945e-01 (7.8962e-01)\n",
      "Epoch: [14][ 100/5005]\tTime  0.559 ( 0.586)\tData  0.000 ( 0.028)\tLoss 7.8346e-01 (7.8993e-01)\n",
      "Epoch: [14][ 150/5005]\tTime  0.558 ( 0.577)\tData  0.000 ( 0.019)\tLoss 7.8964e-01 (8.0275e-01)\n",
      "Epoch: [14][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.014)\tLoss 8.3975e-01 (8.0196e-01)\n",
      "Epoch: [14][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 8.5200e-01 (8.0079e-01)\n",
      "Epoch: [14][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.0978e+00 (8.0413e-01)\n",
      "Epoch: [14][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.008)\tLoss 7.3024e-01 (8.0728e-01)\n",
      "Epoch: [14][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 8.8840e-01 (8.0754e-01)\n",
      "Epoch: [14][ 450/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.006)\tLoss 7.4628e-01 (8.0702e-01)\n",
      "Epoch: [14][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 8.3825e-01 (8.0632e-01)\n",
      "Epoch: [14][ 550/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0018e+00 (8.0395e-01)\n",
      "Epoch: [14][ 600/5005]\tTime  0.561 ( 0.563)\tData  0.000 ( 0.005)\tLoss 8.2689e-01 (8.0412e-01)\n",
      "Epoch: [14][ 650/5005]\tTime  0.560 ( 0.563)\tData  0.001 ( 0.004)\tLoss 9.4319e-01 (8.0516e-01)\n",
      "Epoch: [14][ 700/5005]\tTime  0.561 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.0776e-01 (8.0634e-01)\n",
      "Epoch: [14][ 750/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.3840e-01 (8.0750e-01)\n",
      "Epoch: [14][ 800/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.3671e-01 (8.0673e-01)\n",
      "Epoch: [14][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.6926e-01 (8.0762e-01)\n",
      "Epoch: [14][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 6.6303e-01 (8.0771e-01)\n",
      "Epoch: [14][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.2863e-01 (8.0884e-01)\n",
      "Epoch: [14][1000/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.0578e-01 (8.0950e-01)\n",
      "Epoch: [14][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.4630e-01 (8.1051e-01)\n",
      "Epoch: [14][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.0507e-01 (8.1146e-01)\n",
      "Epoch: [14][1150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 7.3096e-01 (8.1099e-01)\n",
      "Epoch: [14][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.7340e-01 (8.1243e-01)\n",
      "Epoch: [14][1250/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6851e-01 (8.1244e-01)\n",
      "Epoch: [14][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3674e-01 (8.1309e-01)\n",
      "Epoch: [14][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.8193e-01 (8.1303e-01)\n",
      "Epoch: [14][1400/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6573e-01 (8.1331e-01)\n",
      "Epoch: [14][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1323e-01 (8.1379e-01)\n",
      "Epoch: [14][1500/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4663e-01 (8.1431e-01)\n",
      "Epoch: [14][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7904e-01 (8.1461e-01)\n",
      "Epoch: [14][1600/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.2711e-01 (8.1488e-01)\n",
      "Epoch: [14][1650/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.4354e-01 (8.1518e-01)\n",
      "Epoch: [14][1700/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.0621e-01 (8.1511e-01)\n",
      "Epoch: [14][1750/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.0011e-01 (8.1562e-01)\n",
      "Epoch: [14][1800/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.6261e-01 (8.1614e-01)\n",
      "Epoch: [14][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4335e-01 (8.1615e-01)\n",
      "Epoch: [14][1900/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6359e-01 (8.1683e-01)\n",
      "Epoch: [14][1950/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.8988e-01 (8.1779e-01)\n",
      "Epoch: [14][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.3177e-01 (8.1814e-01)\n",
      "Epoch: [14][2050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7614e-01 (8.1783e-01)\n",
      "Epoch: [14][2100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.8415e-01 (8.1768e-01)\n",
      "Epoch: [14][2150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.3083e-01 (8.1712e-01)\n",
      "Epoch: [14][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2129e-01 (8.1743e-01)\n",
      "Epoch: [14][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0868e-01 (8.1772e-01)\n",
      "Epoch: [14][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3368e-01 (8.1769e-01)\n",
      "Epoch: [14][2350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2648e-01 (8.1801e-01)\n",
      "Epoch: [14][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.3785e-01 (8.1831e-01)\n",
      "Epoch: [14][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7311e-01 (8.1904e-01)\n",
      "Epoch: [14][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1297e-01 (8.1931e-01)\n",
      "Epoch: [14][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8479e-01 (8.1943e-01)\n",
      "Epoch: [14][2600/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4791e-01 (8.1940e-01)\n",
      "Epoch: [14][2650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9204e-01 (8.1978e-01)\n",
      "Epoch: [14][2700/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 7.0870e-01 (8.1983e-01)\n",
      "Epoch: [14][2750/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8696e-01 (8.2015e-01)\n",
      "Epoch: [14][2800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0930e-01 (8.2071e-01)\n",
      "Epoch: [14][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5989e-01 (8.2106e-01)\n",
      "Epoch: [14][2900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5257e-01 (8.2136e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [14][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 5.7969e-01 (8.2155e-01)\n",
      "Epoch: [14][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0424e-01 (8.2180e-01)\n",
      "Epoch: [14][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4216e-01 (8.2178e-01)\n",
      "Epoch: [14][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7088e-01 (8.2192e-01)\n",
      "Epoch: [14][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7809e-01 (8.2176e-01)\n",
      "Epoch: [14][3200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7926e-01 (8.2225e-01)\n",
      "Epoch: [14][3250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4471e-01 (8.2250e-01)\n",
      "Epoch: [14][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3179e-01 (8.2272e-01)\n",
      "Epoch: [14][3350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8987e-01 (8.2255e-01)\n",
      "Epoch: [14][3400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5693e-01 (8.2311e-01)\n",
      "Epoch: [14][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9753e-01 (8.2359e-01)\n",
      "Epoch: [14][3500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4674e-01 (8.2381e-01)\n",
      "Epoch: [14][3550/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8683e-01 (8.2399e-01)\n",
      "Epoch: [14][3600/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6244e-01 (8.2407e-01)\n",
      "Epoch: [14][3650/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0124e-01 (8.2451e-01)\n",
      "Epoch: [14][3700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9640e-01 (8.2456e-01)\n",
      "Epoch: [14][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.6617e-01 (8.2481e-01)\n",
      "Epoch: [14][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2087e-01 (8.2524e-01)\n",
      "Epoch: [14][3850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7524e-01 (8.2564e-01)\n",
      "Epoch: [14][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9225e-01 (8.2581e-01)\n",
      "Epoch: [14][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9699e-01 (8.2606e-01)\n",
      "Epoch: [14][4000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4607e-01 (8.2620e-01)\n",
      "Epoch: [14][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2490e-01 (8.2628e-01)\n",
      "Epoch: [14][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7649e-01 (8.2647e-01)\n",
      "Epoch: [14][4150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4976e-01 (8.2645e-01)\n",
      "Epoch: [14][4200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8150e-01 (8.2638e-01)\n",
      "Epoch: [14][4250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7334e-01 (8.2652e-01)\n",
      "Epoch: [14][4300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9563e-01 (8.2687e-01)\n",
      "Epoch: [14][4350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3567e-01 (8.2702e-01)\n",
      "Epoch: [14][4400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8346e-01 (8.2701e-01)\n",
      "Epoch: [14][4450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5598e-01 (8.2711e-01)\n",
      "Epoch: [14][4500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8900e-01 (8.2707e-01)\n",
      "Epoch: [14][4550/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5203e-01 (8.2733e-01)\n",
      "Epoch: [14][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8636e-01 (8.2770e-01)\n",
      "Epoch: [14][4650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0868e-01 (8.2746e-01)\n",
      "Epoch: [14][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1517e-01 (8.2761e-01)\n",
      "Epoch: [14][4750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3002e-01 (8.2775e-01)\n",
      "Epoch: [14][4800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2992e-01 (8.2770e-01)\n",
      "Epoch: [14][4850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9899e-01 (8.2758e-01)\n",
      "Epoch: [14][4900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7470e-01 (8.2774e-01)\n",
      "Epoch: [14][4950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4134e-01 (8.2802e-01)\n",
      "Epoch: [14][5000/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1740e-01 (8.2825e-01)\n",
      "Test: [  0/196]\tTime  3.309 ( 3.309)\tLoss 5.6520e-01 (5.6520e-01)\tAcc@1  86.33 ( 86.33)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.378 ( 0.435)\tLoss 5.4753e-01 (7.4234e-01)\tAcc@1  86.33 ( 80.20)\tAcc@5  96.88 ( 95.33)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 1.4657e+00 (8.7125e-01)\tAcc@1  59.77 ( 77.24)\tAcc@5  88.28 ( 94.04)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.1353e+00 (9.8420e-01)\tAcc@1  76.17 ( 75.10)\tAcc@5  89.84 ( 92.61)\n",
      "epoch 14 0.8282691040765815 74.1259994506836 0.002500000000000001 4698510 0.20040132214688156\n",
      "Epoch: [15][   0/5005]\tTime  3.309 ( 3.309)\tData  2.750 ( 2.750)\tLoss 8.7139e-01 (8.7139e-01)\n",
      "Epoch: [15][  50/5005]\tTime  0.559 ( 0.612)\tData  0.000 ( 0.054)\tLoss 6.8443e-01 (8.0778e-01)\n",
      "Epoch: [15][ 100/5005]\tTime  0.558 ( 0.586)\tData  0.000 ( 0.027)\tLoss 6.3927e-01 (8.0055e-01)\n",
      "Epoch: [15][ 150/5005]\tTime  0.560 ( 0.577)\tData  0.000 ( 0.018)\tLoss 8.7495e-01 (7.9887e-01)\n",
      "Epoch: [15][ 200/5005]\tTime  0.561 ( 0.573)\tData  0.000 ( 0.014)\tLoss 8.7124e-01 (7.9802e-01)\n",
      "Epoch: [15][ 250/5005]\tTime  0.561 ( 0.570)\tData  0.000 ( 0.011)\tLoss 7.4869e-01 (7.9580e-01)\n",
      "Epoch: [15][ 300/5005]\tTime  0.561 ( 0.569)\tData  0.000 ( 0.009)\tLoss 6.9638e-01 (7.9425e-01)\n",
      "Epoch: [15][ 350/5005]\tTime  0.560 ( 0.567)\tData  0.000 ( 0.008)\tLoss 8.3886e-01 (7.9387e-01)\n",
      "Epoch: [15][ 400/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 7.0414e-01 (7.9385e-01)\n",
      "Epoch: [15][ 450/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.006)\tLoss 8.5747e-01 (7.9514e-01)\n",
      "Epoch: [15][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 7.9587e-01 (7.9479e-01)\n",
      "Epoch: [15][ 550/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 8.3075e-01 (7.9617e-01)\n",
      "Epoch: [15][ 600/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 7.8065e-01 (7.9827e-01)\n",
      "Epoch: [15][ 650/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.1840e-01 (7.9713e-01)\n",
      "Epoch: [15][ 700/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.2070e-01 (7.9777e-01)\n",
      "Epoch: [15][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.2043e-01 (7.9754e-01)\n",
      "Epoch: [15][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 7.5311e-01 (7.9754e-01)\n",
      "Epoch: [15][ 850/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.4463e-01 (7.9844e-01)\n",
      "Epoch: [15][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.1421e-01 (7.9838e-01)\n",
      "Epoch: [15][ 950/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0189e+00 (7.9717e-01)\n",
      "Epoch: [15][1000/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.4833e-01 (7.9709e-01)\n",
      "Epoch: [15][1050/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.9456e-01 (7.9654e-01)\n",
      "Epoch: [15][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.4565e-01 (7.9651e-01)\n",
      "Epoch: [15][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.4550e-01 (7.9660e-01)\n",
      "Epoch: [15][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7647e-01 (7.9765e-01)\n",
      "Epoch: [15][1250/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.1872e-01 (7.9758e-01)\n",
      "Epoch: [15][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.7310e-01 (7.9763e-01)\n",
      "Epoch: [15][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.0764e-01 (7.9761e-01)\n",
      "Epoch: [15][1400/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6233e-01 (7.9822e-01)\n",
      "Epoch: [15][1450/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7264e-01 (7.9828e-01)\n",
      "Epoch: [15][1500/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.8632e-01 (7.9788e-01)\n",
      "Epoch: [15][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.5816e-01 (7.9913e-01)\n",
      "Epoch: [15][1600/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8508e-01 (7.9914e-01)\n",
      "Epoch: [15][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.8136e-01 (7.9924e-01)\n",
      "Epoch: [15][1700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.0284e-01 (8.0013e-01)\n",
      "Epoch: [15][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.2901e-01 (7.9986e-01)\n",
      "Epoch: [15][1800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.0005e-01 (8.0045e-01)\n",
      "Epoch: [15][1850/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.9550e-01 (8.0084e-01)\n",
      "Epoch: [15][1900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.8840e-01 (8.0083e-01)\n",
      "Epoch: [15][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.5604e-01 (8.0150e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [15][2000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.0394e-01 (8.0192e-01)\n",
      "Epoch: [15][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.5155e-01 (8.0172e-01)\n",
      "Epoch: [15][2100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 6.9524e-01 (8.0148e-01)\n",
      "Epoch: [15][2150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7267e-01 (8.0162e-01)\n",
      "Epoch: [15][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7761e-01 (8.0128e-01)\n",
      "Epoch: [15][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0417e-01 (8.0158e-01)\n",
      "Epoch: [15][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8435e-01 (8.0170e-01)\n",
      "Epoch: [15][2350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3310e-01 (8.0220e-01)\n",
      "Epoch: [15][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.4703e-01 (8.0203e-01)\n",
      "Epoch: [15][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0524e-01 (8.0205e-01)\n",
      "Epoch: [15][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6559e-01 (8.0205e-01)\n",
      "Epoch: [15][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9423e-01 (8.0216e-01)\n",
      "Epoch: [15][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1484e-01 (8.0251e-01)\n",
      "Epoch: [15][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2168e-01 (8.0280e-01)\n",
      "Epoch: [15][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0544e-01 (8.0267e-01)\n",
      "Epoch: [15][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8811e-01 (8.0282e-01)\n",
      "Epoch: [15][2800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7881e-01 (8.0318e-01)\n",
      "Epoch: [15][2850/5005]\tTime  0.569 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8742e-01 (8.0373e-01)\n",
      "Epoch: [15][2900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3639e-01 (8.0398e-01)\n",
      "Epoch: [15][2950/5005]\tTime  0.566 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0765e-01 (8.0400e-01)\n",
      "Epoch: [15][3000/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2362e-01 (8.0404e-01)\n",
      "Epoch: [15][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3133e-01 (8.0417e-01)\n",
      "Epoch: [15][3100/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8501e-01 (8.0421e-01)\n",
      "Epoch: [15][3150/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6395e-01 (8.0422e-01)\n",
      "Epoch: [15][3200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3473e-01 (8.0439e-01)\n",
      "Epoch: [15][3250/5005]\tTime  0.570 ( 0.560)\tData  0.001 ( 0.001)\tLoss 8.5452e-01 (8.0469e-01)\n",
      "Epoch: [15][3300/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.6913e-01 (8.0477e-01)\n",
      "Epoch: [15][3350/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6451e-01 (8.0457e-01)\n",
      "Epoch: [15][3400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5683e-01 (8.0489e-01)\n",
      "Epoch: [15][3450/5005]\tTime  0.571 ( 0.560)\tData  0.001 ( 0.001)\tLoss 8.7551e-01 (8.0480e-01)\n",
      "Epoch: [15][3500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.2591e-01 (8.0504e-01)\n",
      "Epoch: [15][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3773e-01 (8.0504e-01)\n",
      "Epoch: [15][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1491e-01 (8.0557e-01)\n",
      "Epoch: [15][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5202e-01 (8.0550e-01)\n",
      "Epoch: [15][3700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.7250e-01 (8.0529e-01)\n",
      "Epoch: [15][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4910e-01 (8.0550e-01)\n",
      "Epoch: [15][3800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6311e-01 (8.0560e-01)\n",
      "Epoch: [15][3850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6829e-01 (8.0579e-01)\n",
      "Epoch: [15][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0659e-01 (8.0618e-01)\n",
      "Epoch: [15][3950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6766e-01 (8.0660e-01)\n",
      "Epoch: [15][4000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.2879e-01 (8.0630e-01)\n",
      "Epoch: [15][4050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9084e-01 (8.0617e-01)\n",
      "Epoch: [15][4100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0110e-01 (8.0630e-01)\n",
      "Epoch: [15][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5309e-01 (8.0646e-01)\n",
      "Epoch: [15][4200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8085e-01 (8.0663e-01)\n",
      "Epoch: [15][4250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5310e-01 (8.0669e-01)\n",
      "Epoch: [15][4300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6361e-01 (8.0663e-01)\n",
      "Epoch: [15][4350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4371e-01 (8.0678e-01)\n",
      "Epoch: [15][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3639e-01 (8.0705e-01)\n",
      "Epoch: [15][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5765e-01 (8.0730e-01)\n",
      "Epoch: [15][4500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9108e-01 (8.0752e-01)\n",
      "Epoch: [15][4550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3854e-01 (8.0766e-01)\n",
      "Epoch: [15][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0848e-01 (8.0765e-01)\n",
      "Epoch: [15][4650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5706e-01 (8.0798e-01)\n",
      "Epoch: [15][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3321e-01 (8.0828e-01)\n",
      "Epoch: [15][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0825e-01 (8.0843e-01)\n",
      "Epoch: [15][4800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5443e-01 (8.0846e-01)\n",
      "Epoch: [15][4850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.6342e-01 (8.0837e-01)\n",
      "Epoch: [15][4900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0872e-01 (8.0840e-01)\n",
      "Epoch: [15][4950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9219e-01 (8.0833e-01)\n",
      "Epoch: [15][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9370e-01 (8.0861e-01)\n",
      "Test: [  0/196]\tTime  3.333 ( 3.333)\tLoss 5.5350e-01 (5.5350e-01)\tAcc@1  87.50 ( 87.50)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.378 ( 0.435)\tLoss 4.4940e-01 (7.3269e-01)\tAcc@1  88.28 ( 80.64)\tAcc@5  97.27 ( 95.28)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.4314e+00 (8.5883e-01)\tAcc@1  61.33 ( 77.64)\tAcc@5  85.94 ( 94.04)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.0968e+00 (9.7137e-01)\tAcc@1  75.78 ( 75.44)\tAcc@5  90.62 ( 92.66)\n",
      "epoch 15 0.8086396850111386 74.53199768066406 0.0020000000000000005 4698510 0.20040132214688156\n",
      "Epoch: [16][   0/5005]\tTime  3.035 ( 3.035)\tData  2.470 ( 2.470)\tLoss 7.3554e-01 (7.3554e-01)\n",
      "Epoch: [16][  50/5005]\tTime  0.559 ( 0.607)\tData  0.000 ( 0.049)\tLoss 7.7345e-01 (7.8313e-01)\n",
      "Epoch: [16][ 100/5005]\tTime  0.559 ( 0.583)\tData  0.000 ( 0.025)\tLoss 7.4388e-01 (7.8826e-01)\n",
      "Epoch: [16][ 150/5005]\tTime  0.559 ( 0.575)\tData  0.000 ( 0.017)\tLoss 8.8521e-01 (7.9289e-01)\n",
      "Epoch: [16][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.012)\tLoss 7.5004e-01 (7.8488e-01)\n",
      "Epoch: [16][ 250/5005]\tTime  0.558 ( 0.568)\tData  0.000 ( 0.010)\tLoss 7.4219e-01 (7.7942e-01)\n",
      "Epoch: [16][ 300/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.008)\tLoss 8.3820e-01 (7.8115e-01)\n",
      "Epoch: [16][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 9.4924e-01 (7.8362e-01)\n",
      "Epoch: [16][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.9055e-01 (7.8352e-01)\n",
      "Epoch: [16][ 450/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.006)\tLoss 8.3539e-01 (7.8335e-01)\n",
      "Epoch: [16][ 500/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 6.8877e-01 (7.8252e-01)\n",
      "Epoch: [16][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 6.7915e-01 (7.8286e-01)\n",
      "Epoch: [16][ 600/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.5245e-01 (7.8480e-01)\n",
      "Epoch: [16][ 650/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 7.0024e-01 (7.8342e-01)\n",
      "Epoch: [16][ 700/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 6.9396e-01 (7.8232e-01)\n",
      "Epoch: [16][ 750/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.6087e-01 (7.8116e-01)\n",
      "Epoch: [16][ 800/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.1656e-01 (7.8076e-01)\n",
      "Epoch: [16][ 850/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.4298e-01 (7.8003e-01)\n",
      "Epoch: [16][ 900/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.8171e-01 (7.7876e-01)\n",
      "Epoch: [16][ 950/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.3198e-01 (7.7893e-01)\n",
      "Epoch: [16][1000/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.5331e-01 (7.7839e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [16][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.6731e-01 (7.7894e-01)\n",
      "Epoch: [16][1100/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.002)\tLoss 9.7354e-01 (7.7964e-01)\n",
      "Epoch: [16][1150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.5428e-01 (7.7994e-01)\n",
      "Epoch: [16][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.6317e-01 (7.8008e-01)\n",
      "Epoch: [16][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 5.9824e-01 (7.8026e-01)\n",
      "Epoch: [16][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.1940e-01 (7.7943e-01)\n",
      "Epoch: [16][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6015e-01 (7.7989e-01)\n",
      "Epoch: [16][1400/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.3941e-01 (7.7931e-01)\n",
      "Epoch: [16][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 5.4901e-01 (7.7970e-01)\n",
      "Epoch: [16][1500/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.8743e-01 (7.7939e-01)\n",
      "Epoch: [16][1550/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1645e-01 (7.8026e-01)\n",
      "Epoch: [16][1600/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4216e-01 (7.8105e-01)\n",
      "Epoch: [16][1650/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.8428e-01 (7.8059e-01)\n",
      "Epoch: [16][1700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 6.6601e-01 (7.8110e-01)\n",
      "Epoch: [16][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.0231e-01 (7.8152e-01)\n",
      "Epoch: [16][1800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0014e+00 (7.8129e-01)\n",
      "Epoch: [16][1850/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.6651e-01 (7.8116e-01)\n",
      "Epoch: [16][1900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.2325e-01 (7.8113e-01)\n",
      "Epoch: [16][1950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5085e-01 (7.8157e-01)\n",
      "Epoch: [16][2000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.3581e-01 (7.8134e-01)\n",
      "Epoch: [16][2050/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0541e-01 (7.8172e-01)\n",
      "Epoch: [16][2100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4541e-01 (7.8127e-01)\n",
      "Epoch: [16][2150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 5.9280e-01 (7.8152e-01)\n",
      "Epoch: [16][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0371e-01 (7.8202e-01)\n",
      "Epoch: [16][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2596e-01 (7.8201e-01)\n",
      "Epoch: [16][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 5.9604e-01 (7.8175e-01)\n",
      "Epoch: [16][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3324e-01 (7.8209e-01)\n",
      "Epoch: [16][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7746e-01 (7.8254e-01)\n",
      "Epoch: [16][2450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1729e-01 (7.8275e-01)\n",
      "Epoch: [16][2500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0097e-01 (7.8310e-01)\n",
      "Epoch: [16][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6203e-01 (7.8329e-01)\n",
      "Epoch: [16][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2151e-01 (7.8363e-01)\n",
      "Epoch: [16][2650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0829e-01 (7.8398e-01)\n",
      "Epoch: [16][2700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 5.5710e-01 (7.8397e-01)\n",
      "Epoch: [16][2750/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4054e-01 (7.8427e-01)\n",
      "Epoch: [16][2800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6032e-01 (7.8431e-01)\n",
      "Epoch: [16][2850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3027e-01 (7.8465e-01)\n",
      "Epoch: [16][2900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6661e-01 (7.8489e-01)\n",
      "Epoch: [16][2950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0412e-01 (7.8500e-01)\n",
      "Epoch: [16][3000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0424e-01 (7.8486e-01)\n",
      "Epoch: [16][3050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0557e-01 (7.8481e-01)\n",
      "Epoch: [16][3100/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5967e-01 (7.8461e-01)\n",
      "Epoch: [16][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7731e-01 (7.8464e-01)\n",
      "Epoch: [16][3200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0021e+00 (7.8500e-01)\n",
      "Epoch: [16][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6417e-01 (7.8461e-01)\n",
      "Epoch: [16][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5022e-01 (7.8493e-01)\n",
      "Epoch: [16][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5372e-01 (7.8497e-01)\n",
      "Epoch: [16][3400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1947e-01 (7.8485e-01)\n",
      "Epoch: [16][3450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3429e-01 (7.8480e-01)\n",
      "Epoch: [16][3500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9046e-01 (7.8484e-01)\n",
      "Epoch: [16][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4273e-01 (7.8482e-01)\n",
      "Epoch: [16][3600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7703e-01 (7.8462e-01)\n",
      "Epoch: [16][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.7513e-01 (7.8485e-01)\n",
      "Epoch: [16][3700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4780e-01 (7.8468e-01)\n",
      "Epoch: [16][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1654e-01 (7.8505e-01)\n",
      "Epoch: [16][3800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9873e-01 (7.8505e-01)\n",
      "Epoch: [16][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7964e-01 (7.8529e-01)\n",
      "Epoch: [16][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3148e-01 (7.8524e-01)\n",
      "Epoch: [16][3950/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.8538e-01 (7.8551e-01)\n",
      "Epoch: [16][4000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1659e-01 (7.8564e-01)\n",
      "Epoch: [16][4050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6546e-01 (7.8571e-01)\n",
      "Epoch: [16][4100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3968e-01 (7.8566e-01)\n",
      "Epoch: [16][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4436e-01 (7.8592e-01)\n",
      "Epoch: [16][4200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9452e-01 (7.8595e-01)\n",
      "Epoch: [16][4250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9853e-01 (7.8605e-01)\n",
      "Epoch: [16][4300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8625e-01 (7.8604e-01)\n",
      "Epoch: [16][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.7525e-01 (7.8627e-01)\n",
      "Epoch: [16][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2169e-01 (7.8638e-01)\n",
      "Epoch: [16][4450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5519e-01 (7.8646e-01)\n",
      "Epoch: [16][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8614e-01 (7.8678e-01)\n",
      "Epoch: [16][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6972e-01 (7.8714e-01)\n",
      "Epoch: [16][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5506e-01 (7.8739e-01)\n",
      "Epoch: [16][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8887e-01 (7.8748e-01)\n",
      "Epoch: [16][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7886e-01 (7.8755e-01)\n",
      "Epoch: [16][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7875e-01 (7.8740e-01)\n",
      "Epoch: [16][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0639e-01 (7.8737e-01)\n",
      "Epoch: [16][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9166e-01 (7.8744e-01)\n",
      "Epoch: [16][4900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4517e-01 (7.8737e-01)\n",
      "Epoch: [16][4950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6258e-01 (7.8717e-01)\n",
      "Epoch: [16][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4940e-01 (7.8737e-01)\n",
      "Test: [  0/196]\tTime  3.529 ( 3.529)\tLoss 5.0624e-01 (5.0624e-01)\tAcc@1  86.33 ( 86.33)\tAcc@5  98.05 ( 98.05)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.439)\tLoss 4.2435e-01 (7.1869e-01)\tAcc@1  88.67 ( 81.04)\tAcc@5  97.27 ( 95.63)\n",
      "Test: [100/196]\tTime  0.377 ( 0.408)\tLoss 1.3344e+00 (8.4933e-01)\tAcc@1  61.72 ( 77.95)\tAcc@5  88.67 ( 94.22)\n",
      "Test: [150/196]\tTime  0.377 ( 0.398)\tLoss 1.0634e+00 (9.6151e-01)\tAcc@1  77.73 ( 75.81)\tAcc@5  91.02 ( 92.78)\n",
      "epoch 16 0.7873446162774891 74.83999633789062 0.0015000000000000011 4698510 0.20040132214688156\n",
      "Epoch: [17][   0/5005]\tTime  2.800 ( 2.800)\tData  2.240 ( 2.240)\tLoss 8.5501e-01 (8.5501e-01)\n",
      "Epoch: [17][  50/5005]\tTime  0.558 ( 0.603)\tData  0.000 ( 0.044)\tLoss 7.6510e-01 (7.6734e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [17][ 100/5005]\tTime  0.558 ( 0.581)\tData  0.000 ( 0.022)\tLoss 6.8795e-01 (7.6921e-01)\n",
      "Epoch: [17][ 150/5005]\tTime  0.559 ( 0.573)\tData  0.000 ( 0.015)\tLoss 6.5588e-01 (7.7128e-01)\n",
      "Epoch: [17][ 200/5005]\tTime  0.558 ( 0.570)\tData  0.000 ( 0.011)\tLoss 8.2722e-01 (7.6364e-01)\n",
      "Epoch: [17][ 250/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.009)\tLoss 8.4938e-01 (7.6351e-01)\n",
      "Epoch: [17][ 300/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.008)\tLoss 8.8848e-01 (7.6511e-01)\n",
      "Epoch: [17][ 350/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 8.0716e-01 (7.6455e-01)\n",
      "Epoch: [17][ 400/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 7.3322e-01 (7.6700e-01)\n",
      "Epoch: [17][ 450/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.005)\tLoss 7.9011e-01 (7.6739e-01)\n",
      "Epoch: [17][ 500/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 7.3769e-01 (7.6815e-01)\n",
      "Epoch: [17][ 550/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 6.8866e-01 (7.6809e-01)\n",
      "Epoch: [17][ 600/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.1435e-01 (7.6593e-01)\n",
      "Epoch: [17][ 650/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 7.7134e-01 (7.6471e-01)\n",
      "Epoch: [17][ 700/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 6.7409e-01 (7.6526e-01)\n",
      "Epoch: [17][ 750/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.1243e-01 (7.6525e-01)\n",
      "Epoch: [17][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.5305e-01 (7.6544e-01)\n",
      "Epoch: [17][ 850/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.7778e-01 (7.6657e-01)\n",
      "Epoch: [17][ 900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 8.6324e-01 (7.6537e-01)\n",
      "Epoch: [17][ 950/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 9.2405e-01 (7.6591e-01)\n",
      "Epoch: [17][1000/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.3481e-01 (7.6572e-01)\n",
      "Epoch: [17][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6699e-01 (7.6649e-01)\n",
      "Epoch: [17][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6334e-01 (7.6727e-01)\n",
      "Epoch: [17][1150/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1668e-01 (7.6635e-01)\n",
      "Epoch: [17][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4150e-01 (7.6680e-01)\n",
      "Epoch: [17][1250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.0307e-01 (7.6650e-01)\n",
      "Epoch: [17][1300/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.2449e-01 (7.6657e-01)\n",
      "Epoch: [17][1350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.9388e-01 (7.6734e-01)\n",
      "Epoch: [17][1400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.002)\tLoss 7.2973e-01 (7.6747e-01)\n",
      "Epoch: [17][1450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.5989e-01 (7.6718e-01)\n",
      "Epoch: [17][1500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 6.7302e-01 (7.6667e-01)\n",
      "Epoch: [17][1550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 6.9360e-01 (7.6643e-01)\n",
      "Epoch: [17][1600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 5.9291e-01 (7.6597e-01)\n",
      "Epoch: [17][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 6.7259e-01 (7.6611e-01)\n",
      "Epoch: [17][1700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 8.1917e-01 (7.6571e-01)\n",
      "Epoch: [17][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9528e-01 (7.6548e-01)\n",
      "Epoch: [17][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0673e-01 (7.6559e-01)\n",
      "Epoch: [17][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4929e-01 (7.6519e-01)\n",
      "Epoch: [17][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4597e-01 (7.6544e-01)\n",
      "Epoch: [17][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9756e-01 (7.6506e-01)\n",
      "Epoch: [17][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2260e-01 (7.6496e-01)\n",
      "Epoch: [17][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1742e-01 (7.6547e-01)\n",
      "Epoch: [17][2100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7923e-01 (7.6576e-01)\n",
      "Epoch: [17][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0295e-01 (7.6557e-01)\n",
      "Epoch: [17][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4693e-01 (7.6587e-01)\n",
      "Epoch: [17][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1040e-01 (7.6591e-01)\n",
      "Epoch: [17][2300/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5993e-01 (7.6607e-01)\n",
      "Epoch: [17][2350/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4363e-01 (7.6613e-01)\n",
      "Epoch: [17][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7273e-01 (7.6613e-01)\n",
      "Epoch: [17][2450/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2246e-01 (7.6580e-01)\n",
      "Epoch: [17][2500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2568e-01 (7.6578e-01)\n",
      "Epoch: [17][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5052e-01 (7.6579e-01)\n",
      "Epoch: [17][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3452e-01 (7.6623e-01)\n",
      "Epoch: [17][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4615e-01 (7.6625e-01)\n",
      "Epoch: [17][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9083e-01 (7.6590e-01)\n",
      "Epoch: [17][2750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8583e-01 (7.6606e-01)\n",
      "Epoch: [17][2800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4997e-01 (7.6614e-01)\n",
      "Epoch: [17][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7216e-01 (7.6613e-01)\n",
      "Epoch: [17][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 5.9854e-01 (7.6630e-01)\n",
      "Epoch: [17][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4974e-01 (7.6635e-01)\n",
      "Epoch: [17][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7085e-01 (7.6631e-01)\n",
      "Epoch: [17][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3086e-01 (7.6623e-01)\n",
      "Epoch: [17][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8021e-01 (7.6608e-01)\n",
      "Epoch: [17][3150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9754e-01 (7.6613e-01)\n",
      "Epoch: [17][3200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3667e-01 (7.6634e-01)\n",
      "Epoch: [17][3250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.8879e-01 (7.6621e-01)\n",
      "Epoch: [17][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1981e-01 (7.6661e-01)\n",
      "Epoch: [17][3350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.2483e-01 (7.6698e-01)\n",
      "Epoch: [17][3400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.6612e-01 (7.6709e-01)\n",
      "Epoch: [17][3450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.7724e-01 (7.6718e-01)\n",
      "Epoch: [17][3500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.7072e-01 (7.6723e-01)\n",
      "Epoch: [17][3550/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.4158e-01 (7.6718e-01)\n",
      "Epoch: [17][3600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.1090e-01 (7.6706e-01)\n",
      "Epoch: [17][3650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.8204e-01 (7.6707e-01)\n",
      "Epoch: [17][3700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.6270e-01 (7.6710e-01)\n",
      "Epoch: [17][3750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.0886e-01 (7.6693e-01)\n",
      "Epoch: [17][3800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.9994e-01 (7.6714e-01)\n",
      "Epoch: [17][3850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.8413e-01 (7.6727e-01)\n",
      "Epoch: [17][3900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.0439e-01 (7.6765e-01)\n",
      "Epoch: [17][3950/5005]\tTime  0.560 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.6042e-01 (7.6763e-01)\n",
      "Epoch: [17][4000/5005]\tTime  0.567 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.9992e-01 (7.6768e-01)\n",
      "Epoch: [17][4050/5005]\tTime  0.562 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.8523e-01 (7.6768e-01)\n",
      "Epoch: [17][4100/5005]\tTime  0.562 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.1376e-01 (7.6762e-01)\n",
      "Epoch: [17][4150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.7987e-01 (7.6756e-01)\n",
      "Epoch: [17][4200/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.3912e-01 (7.6739e-01)\n",
      "Epoch: [17][4250/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.8872e-01 (7.6769e-01)\n",
      "Epoch: [17][4300/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.9402e-01 (7.6775e-01)\n",
      "Epoch: [17][4350/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 9.0512e-01 (7.6774e-01)\n",
      "Epoch: [17][4400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.3208e-01 (7.6778e-01)\n",
      "Epoch: [17][4450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.7142e-01 (7.6784e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [17][4500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.9351e-01 (7.6801e-01)\n",
      "Epoch: [17][4550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.7050e-01 (7.6780e-01)\n",
      "Epoch: [17][4600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.4788e-01 (7.6793e-01)\n",
      "Epoch: [17][4650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.8464e-01 (7.6803e-01)\n",
      "Epoch: [17][4700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.7276e-01 (7.6803e-01)\n",
      "Epoch: [17][4750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.8922e-01 (7.6820e-01)\n",
      "Epoch: [17][4800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 8.5940e-01 (7.6827e-01)\n",
      "Epoch: [17][4850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.9251e-01 (7.6839e-01)\n",
      "Epoch: [17][4900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 6.8060e-01 (7.6821e-01)\n",
      "Epoch: [17][4950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.6958e-01 (7.6836e-01)\n",
      "Epoch: [17][5000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 7.4938e-01 (7.6819e-01)\n",
      "Test: [  0/196]\tTime  3.551 ( 3.551)\tLoss 5.1620e-01 (5.1620e-01)\tAcc@1  87.11 ( 87.11)\tAcc@5  96.09 ( 96.09)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.439)\tLoss 3.9518e-01 (7.1416e-01)\tAcc@1  90.23 ( 81.02)\tAcc@5  97.66 ( 95.60)\n",
      "Test: [100/196]\tTime  0.377 ( 0.409)\tLoss 1.2851e+00 (8.4414e-01)\tAcc@1  64.45 ( 78.03)\tAcc@5  90.23 ( 94.30)\n",
      "Test: [150/196]\tTime  0.377 ( 0.398)\tLoss 1.0655e+00 (9.5463e-01)\tAcc@1  76.17 ( 75.87)\tAcc@5  91.02 ( 92.83)\n",
      "epoch 17 0.7682099506785648 74.947998046875 0.0010000000000000005 4698510 0.20040132214688156\n",
      "Epoch: [18][   0/5005]\tTime  3.201 ( 3.201)\tData  2.642 ( 2.642)\tLoss 7.5539e-01 (7.5539e-01)\n",
      "Epoch: [18][  50/5005]\tTime  0.558 ( 0.610)\tData  0.000 ( 0.052)\tLoss 9.0519e-01 (7.6026e-01)\n",
      "Epoch: [18][ 100/5005]\tTime  0.558 ( 0.585)\tData  0.000 ( 0.026)\tLoss 7.3120e-01 (7.5516e-01)\n",
      "Epoch: [18][ 150/5005]\tTime  0.559 ( 0.576)\tData  0.000 ( 0.018)\tLoss 7.8981e-01 (7.4461e-01)\n",
      "Epoch: [18][ 200/5005]\tTime  0.558 ( 0.572)\tData  0.000 ( 0.013)\tLoss 6.1884e-01 (7.4014e-01)\n",
      "Epoch: [18][ 250/5005]\tTime  0.558 ( 0.569)\tData  0.000 ( 0.011)\tLoss 6.0060e-01 (7.3905e-01)\n",
      "Epoch: [18][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.009)\tLoss 8.0014e-01 (7.4034e-01)\n",
      "Epoch: [18][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.008)\tLoss 6.9776e-01 (7.3838e-01)\n",
      "Epoch: [18][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 8.4327e-01 (7.4078e-01)\n",
      "Epoch: [18][ 450/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.006)\tLoss 9.1054e-01 (7.4358e-01)\n",
      "Epoch: [18][ 500/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 5.2834e-01 (7.4300e-01)\n",
      "Epoch: [18][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.005)\tLoss 7.0016e-01 (7.4586e-01)\n",
      "Epoch: [18][ 600/5005]\tTime  0.567 ( 0.563)\tData  0.000 ( 0.005)\tLoss 8.4910e-01 (7.4563e-01)\n",
      "Epoch: [18][ 650/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.1496e-01 (7.4659e-01)\n",
      "Epoch: [18][ 700/5005]\tTime  0.561 ( 0.563)\tData  0.000 ( 0.004)\tLoss 6.8741e-01 (7.4679e-01)\n",
      "Epoch: [18][ 750/5005]\tTime  0.568 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.1644e-01 (7.4656e-01)\n",
      "Epoch: [18][ 800/5005]\tTime  0.562 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.2015e-01 (7.4645e-01)\n",
      "Epoch: [18][ 850/5005]\tTime  0.559 ( 0.563)\tData  0.001 ( 0.003)\tLoss 6.0472e-01 (7.4649e-01)\n",
      "Epoch: [18][ 900/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.003)\tLoss 9.3267e-01 (7.4672e-01)\n",
      "Epoch: [18][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 6.4391e-01 (7.4679e-01)\n",
      "Epoch: [18][1000/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.5479e-01 (7.4678e-01)\n",
      "Epoch: [18][1050/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.8186e-01 (7.4719e-01)\n",
      "Epoch: [18][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.8540e-01 (7.4662e-01)\n",
      "Epoch: [18][1150/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.6868e-01 (7.4724e-01)\n",
      "Epoch: [18][1200/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.002)\tLoss 7.6662e-01 (7.4766e-01)\n",
      "Epoch: [18][1250/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.002)\tLoss 6.6028e-01 (7.4761e-01)\n",
      "Epoch: [18][1300/5005]\tTime  0.563 ( 0.562)\tData  0.000 ( 0.002)\tLoss 6.3718e-01 (7.4700e-01)\n",
      "Epoch: [18][1350/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.002)\tLoss 6.6911e-01 (7.4730e-01)\n",
      "Epoch: [18][1400/5005]\tTime  0.562 ( 0.562)\tData  0.000 ( 0.002)\tLoss 7.1916e-01 (7.4650e-01)\n",
      "Epoch: [18][1450/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.002)\tLoss 7.2428e-01 (7.4625e-01)\n",
      "Epoch: [18][1500/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.002)\tLoss 7.1942e-01 (7.4555e-01)\n",
      "Epoch: [18][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.9233e-01 (7.4530e-01)\n",
      "Epoch: [18][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4526e-01 (7.4530e-01)\n",
      "Epoch: [18][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.9355e-01 (7.4544e-01)\n",
      "Epoch: [18][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3988e-01 (7.4494e-01)\n",
      "Epoch: [18][1750/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4616e-01 (7.4553e-01)\n",
      "Epoch: [18][1800/5005]\tTime  0.570 ( 0.561)\tData  0.001 ( 0.002)\tLoss 7.1906e-01 (7.4581e-01)\n",
      "Epoch: [18][1850/5005]\tTime  0.562 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.8721e-01 (7.4598e-01)\n",
      "Epoch: [18][1900/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.9819e-01 (7.4584e-01)\n",
      "Epoch: [18][1950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8221e-01 (7.4602e-01)\n",
      "Epoch: [18][2000/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.2301e-01 (7.4584e-01)\n",
      "Epoch: [18][2050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.9938e-01 (7.4553e-01)\n",
      "Epoch: [18][2100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 6.6383e-01 (7.4594e-01)\n",
      "Epoch: [18][2150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.4213e-01 (7.4626e-01)\n",
      "Epoch: [18][2200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.001)\tLoss 6.2423e-01 (7.4643e-01)\n",
      "Epoch: [18][2250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.1549e-01 (7.4639e-01)\n",
      "Epoch: [18][2300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.9387e-01 (7.4619e-01)\n",
      "Epoch: [18][2350/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.001)\tLoss 8.0311e-01 (7.4611e-01)\n",
      "Epoch: [18][2400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.0633e-01 (7.4617e-01)\n",
      "Epoch: [18][2450/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.001)\tLoss 6.2953e-01 (7.4645e-01)\n",
      "Epoch: [18][2500/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.8294e-01 (7.4623e-01)\n",
      "Epoch: [18][2550/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.0899e-01 (7.4624e-01)\n",
      "Epoch: [18][2600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 5.4221e-01 (7.4632e-01)\n",
      "Epoch: [18][2650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.9478e-01 (7.4661e-01)\n",
      "Epoch: [18][2700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0791e-01 (7.4668e-01)\n",
      "Epoch: [18][2750/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2923e-01 (7.4701e-01)\n",
      "Epoch: [18][2800/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 7.0293e-01 (7.4717e-01)\n",
      "Epoch: [18][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0376e-01 (7.4680e-01)\n",
      "Epoch: [18][2900/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5078e-01 (7.4672e-01)\n",
      "Epoch: [18][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0308e+00 (7.4645e-01)\n",
      "Epoch: [18][3000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2108e-01 (7.4635e-01)\n",
      "Epoch: [18][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1274e-01 (7.4647e-01)\n",
      "Epoch: [18][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.8442e-01 (7.4659e-01)\n",
      "Epoch: [18][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4033e-01 (7.4663e-01)\n",
      "Epoch: [18][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.7978e-01 (7.4675e-01)\n",
      "Epoch: [18][3250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.0070e-01 (7.4644e-01)\n",
      "Epoch: [18][3300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5390e-01 (7.4644e-01)\n",
      "Epoch: [18][3350/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7978e-01 (7.4678e-01)\n",
      "Epoch: [18][3400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6415e-01 (7.4656e-01)\n",
      "Epoch: [18][3450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4022e-01 (7.4664e-01)\n",
      "Epoch: [18][3500/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1280e-01 (7.4688e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [18][3550/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4037e-01 (7.4643e-01)\n",
      "Epoch: [18][3600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2831e-01 (7.4652e-01)\n",
      "Epoch: [18][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8507e-01 (7.4655e-01)\n",
      "Epoch: [18][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9964e-01 (7.4646e-01)\n",
      "Epoch: [18][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.7636e-01 (7.4624e-01)\n",
      "Epoch: [18][3800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4982e-01 (7.4631e-01)\n",
      "Epoch: [18][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5131e-01 (7.4610e-01)\n",
      "Epoch: [18][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3028e-01 (7.4616e-01)\n",
      "Epoch: [18][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6968e-01 (7.4617e-01)\n",
      "Epoch: [18][4000/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1373e-01 (7.4607e-01)\n",
      "Epoch: [18][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1224e-01 (7.4641e-01)\n",
      "Epoch: [18][4100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6351e-01 (7.4674e-01)\n",
      "Epoch: [18][4150/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7880e-01 (7.4695e-01)\n",
      "Epoch: [18][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9670e-01 (7.4692e-01)\n",
      "Epoch: [18][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8990e-01 (7.4701e-01)\n",
      "Epoch: [18][4300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2898e-01 (7.4724e-01)\n",
      "Epoch: [18][4350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4476e-01 (7.4722e-01)\n",
      "Epoch: [18][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9317e-01 (7.4756e-01)\n",
      "Epoch: [18][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5573e-01 (7.4758e-01)\n",
      "Epoch: [18][4500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3734e-01 (7.4770e-01)\n",
      "Epoch: [18][4550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5439e-01 (7.4759e-01)\n",
      "Epoch: [18][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.8572e-01 (7.4755e-01)\n",
      "Epoch: [18][4650/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5481e-01 (7.4764e-01)\n",
      "Epoch: [18][4700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0427e-01 (7.4776e-01)\n",
      "Epoch: [18][4750/5005]\tTime  0.561 ( 0.560)\tData  0.001 ( 0.001)\tLoss 8.5585e-01 (7.4783e-01)\n",
      "Epoch: [18][4800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5069e-01 (7.4781e-01)\n",
      "Epoch: [18][4850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 5.5132e-01 (7.4799e-01)\n",
      "Epoch: [18][4900/5005]\tTime  0.567 ( 0.560)\tData  0.001 ( 0.001)\tLoss 6.9041e-01 (7.4801e-01)\n",
      "Epoch: [18][4950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6887e-01 (7.4794e-01)\n",
      "Epoch: [18][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.3619e-01 (7.4784e-01)\n",
      "Test: [  0/196]\tTime  3.871 ( 3.871)\tLoss 5.4037e-01 (5.4037e-01)\tAcc@1  85.55 ( 85.55)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.379 ( 0.446)\tLoss 4.5497e-01 (7.0010e-01)\tAcc@1  88.28 ( 81.50)\tAcc@5  97.66 ( 95.67)\n",
      "Test: [100/196]\tTime  0.378 ( 0.413)\tLoss 1.2280e+00 (8.3280e-01)\tAcc@1  69.14 ( 78.29)\tAcc@5  89.45 ( 94.35)\n",
      "Test: [150/196]\tTime  0.378 ( 0.401)\tLoss 1.0586e+00 (9.4328e-01)\tAcc@1  77.73 ( 76.06)\tAcc@5  91.80 ( 92.98)\n",
      "epoch 18 0.7478239465766618 75.22200012207031 0.0005000000000000008 4698510 0.20040132214688156\n",
      "Epoch: [19][   0/5005]\tTime  2.971 ( 2.971)\tData  2.410 ( 2.410)\tLoss 6.2599e-01 (6.2599e-01)\n",
      "Epoch: [19][  50/5005]\tTime  0.559 ( 0.611)\tData  0.001 ( 0.048)\tLoss 7.6390e-01 (7.1631e-01)\n",
      "Epoch: [19][ 100/5005]\tTime  0.558 ( 0.585)\tData  0.000 ( 0.024)\tLoss 8.2033e-01 (7.2436e-01)\n",
      "Epoch: [19][ 150/5005]\tTime  0.558 ( 0.576)\tData  0.000 ( 0.016)\tLoss 7.5027e-01 (7.2518e-01)\n",
      "Epoch: [19][ 200/5005]\tTime  0.558 ( 0.572)\tData  0.000 ( 0.012)\tLoss 8.7917e-01 (7.3519e-01)\n",
      "Epoch: [19][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 8.6935e-01 (7.3377e-01)\n",
      "Epoch: [19][ 300/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.008)\tLoss 7.6661e-01 (7.3216e-01)\n",
      "Epoch: [19][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 5.9914e-01 (7.3226e-01)\n",
      "Epoch: [19][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.006)\tLoss 5.9424e-01 (7.3210e-01)\n",
      "Epoch: [19][ 450/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 7.8574e-01 (7.3105e-01)\n",
      "Epoch: [19][ 500/5005]\tTime  0.568 ( 0.564)\tData  0.001 ( 0.005)\tLoss 7.1791e-01 (7.3102e-01)\n",
      "Epoch: [19][ 550/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.005)\tLoss 6.1265e-01 (7.3015e-01)\n",
      "Epoch: [19][ 600/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.004)\tLoss 5.9522e-01 (7.2904e-01)\n",
      "Epoch: [19][ 650/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.004)\tLoss 7.3792e-01 (7.2901e-01)\n",
      "Epoch: [19][ 700/5005]\tTime  0.561 ( 0.564)\tData  0.000 ( 0.004)\tLoss 6.8960e-01 (7.2879e-01)\n",
      "Epoch: [19][ 750/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.003)\tLoss 8.5322e-01 (7.2916e-01)\n",
      "Epoch: [19][ 800/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.003)\tLoss 7.7299e-01 (7.2982e-01)\n",
      "Epoch: [19][ 850/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.003)\tLoss 7.4196e-01 (7.2967e-01)\n",
      "Epoch: [19][ 900/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.003)\tLoss 6.9144e-01 (7.2868e-01)\n",
      "Epoch: [19][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 6.8468e-01 (7.2886e-01)\n",
      "Epoch: [19][1000/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 6.2245e-01 (7.2859e-01)\n",
      "Epoch: [19][1050/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.5882e-01 (7.2815e-01)\n",
      "Epoch: [19][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.002)\tLoss 7.6108e-01 (7.2853e-01)\n",
      "Epoch: [19][1150/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.002)\tLoss 6.4333e-01 (7.2801e-01)\n",
      "Epoch: [19][1200/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.002)\tLoss 7.0190e-01 (7.2740e-01)\n",
      "Epoch: [19][1250/5005]\tTime  0.561 ( 0.562)\tData  0.000 ( 0.002)\tLoss 8.6352e-01 (7.2766e-01)\n",
      "Epoch: [19][1300/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.002)\tLoss 7.6439e-01 (7.2790e-01)\n",
      "Epoch: [19][1350/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3897e-01 (7.2827e-01)\n",
      "Epoch: [19][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.4092e-01 (7.2807e-01)\n",
      "Epoch: [19][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.5091e-01 (7.2773e-01)\n",
      "Epoch: [19][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.6927e-01 (7.2720e-01)\n",
      "Epoch: [19][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7081e-01 (7.2746e-01)\n",
      "Epoch: [19][1600/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 6.7758e-01 (7.2748e-01)\n",
      "Epoch: [19][1650/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3662e-01 (7.2782e-01)\n",
      "Epoch: [19][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.5026e-01 (7.2805e-01)\n",
      "Epoch: [19][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.6321e-01 (7.2746e-01)\n",
      "Epoch: [19][1800/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4138e-01 (7.2777e-01)\n",
      "Epoch: [19][1850/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.002)\tLoss 5.7100e-01 (7.2734e-01)\n",
      "Epoch: [19][1900/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.001)\tLoss 9.0046e-01 (7.2751e-01)\n",
      "Epoch: [19][1950/5005]\tTime  0.561 ( 0.561)\tData  0.000 ( 0.001)\tLoss 8.5542e-01 (7.2658e-01)\n",
      "Epoch: [19][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 6.7257e-01 (7.2701e-01)\n",
      "Epoch: [19][2050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.001)\tLoss 6.7733e-01 (7.2761e-01)\n",
      "Epoch: [19][2100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.4970e-01 (7.2702e-01)\n",
      "Epoch: [19][2150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 7.9246e-01 (7.2703e-01)\n",
      "Epoch: [19][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.5871e-01 (7.2727e-01)\n",
      "Epoch: [19][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9359e-01 (7.2703e-01)\n",
      "Epoch: [19][2300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3277e-01 (7.2704e-01)\n",
      "Epoch: [19][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4749e-01 (7.2693e-01)\n",
      "Epoch: [19][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6566e-01 (7.2697e-01)\n",
      "Epoch: [19][2450/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7922e-01 (7.2684e-01)\n",
      "Epoch: [19][2500/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2505e-01 (7.2710e-01)\n",
      "Epoch: [19][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1110e-01 (7.2685e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [19][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8668e-01 (7.2699e-01)\n",
      "Epoch: [19][2650/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.3903e-01 (7.2719e-01)\n",
      "Epoch: [19][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9846e-01 (7.2707e-01)\n",
      "Epoch: [19][2750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9705e-01 (7.2705e-01)\n",
      "Epoch: [19][2800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6591e-01 (7.2694e-01)\n",
      "Epoch: [19][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2229e-01 (7.2709e-01)\n",
      "Epoch: [19][2900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6299e-01 (7.2736e-01)\n",
      "Epoch: [19][2950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7997e-01 (7.2766e-01)\n",
      "Epoch: [19][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.3463e-01 (7.2728e-01)\n",
      "Epoch: [19][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3583e-01 (7.2742e-01)\n",
      "Epoch: [19][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3653e-01 (7.2726e-01)\n",
      "Epoch: [19][3150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7332e-01 (7.2708e-01)\n",
      "Epoch: [19][3200/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0210e-01 (7.2699e-01)\n",
      "Epoch: [19][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.3719e-01 (7.2697e-01)\n",
      "Epoch: [19][3300/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0411e-01 (7.2721e-01)\n",
      "Epoch: [19][3350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.6284e-01 (7.2718e-01)\n",
      "Epoch: [19][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.6507e-01 (7.2699e-01)\n",
      "Epoch: [19][3450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0829e-01 (7.2682e-01)\n",
      "Epoch: [19][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.0632e-01 (7.2678e-01)\n",
      "Epoch: [19][3550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9773e-01 (7.2680e-01)\n",
      "Epoch: [19][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9318e-01 (7.2646e-01)\n",
      "Epoch: [19][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4841e-01 (7.2627e-01)\n",
      "Epoch: [19][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.4017e-01 (7.2638e-01)\n",
      "Epoch: [19][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4746e-01 (7.2645e-01)\n",
      "Epoch: [19][3800/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1549e-01 (7.2641e-01)\n",
      "Epoch: [19][3850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5558e-01 (7.2622e-01)\n",
      "Epoch: [19][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9844e-01 (7.2626e-01)\n",
      "Epoch: [19][3950/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1021e-01 (7.2619e-01)\n",
      "Epoch: [19][4000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6092e-01 (7.2615e-01)\n",
      "Epoch: [19][4050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7789e-01 (7.2630e-01)\n",
      "Epoch: [19][4100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4242e-01 (7.2655e-01)\n",
      "Epoch: [19][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9354e-01 (7.2690e-01)\n",
      "Epoch: [19][4200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9704e-01 (7.2693e-01)\n",
      "Epoch: [19][4250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7306e-01 (7.2678e-01)\n",
      "Epoch: [19][4300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0537e-01 (7.2683e-01)\n",
      "Epoch: [19][4350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8643e-01 (7.2683e-01)\n",
      "Epoch: [19][4400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1254e-01 (7.2680e-01)\n",
      "Epoch: [19][4450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6286e-01 (7.2676e-01)\n",
      "Epoch: [19][4500/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.2380e-01 (7.2648e-01)\n",
      "Epoch: [19][4550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5521e-01 (7.2659e-01)\n",
      "Epoch: [19][4600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9688e-01 (7.2670e-01)\n",
      "Epoch: [19][4650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8843e-01 (7.2688e-01)\n",
      "Epoch: [19][4700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.5645e-01 (7.2707e-01)\n",
      "Epoch: [19][4750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4556e-01 (7.2699e-01)\n",
      "Epoch: [19][4800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1071e-01 (7.2721e-01)\n",
      "Epoch: [19][4850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.1612e-01 (7.2723e-01)\n",
      "Epoch: [19][4900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.4073e-01 (7.2708e-01)\n",
      "Epoch: [19][4950/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2967e-01 (7.2710e-01)\n",
      "Epoch: [19][5000/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.0980e-01 (7.2703e-01)\n",
      "Test: [  0/196]\tTime  3.295 ( 3.295)\tLoss 5.3719e-01 (5.3719e-01)\tAcc@1  85.55 ( 85.55)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 4.4355e-01 (6.9311e-01)\tAcc@1  88.67 ( 81.53)\tAcc@5  96.88 ( 95.80)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 1.2819e+00 (8.2068e-01)\tAcc@1  65.23 ( 78.45)\tAcc@5  91.02 ( 94.53)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.0567e+00 (9.3304e-01)\tAcc@1  77.73 ( 76.23)\tAcc@5  90.62 ( 93.12)\n",
      "epoch 19 0.727048396857132 75.4280014038086 0.0 4698510 0.20040132214688156\n",
      "acc 75.4280014038086\n"
     ]
    }
   ],
   "source": [
    "def get_res(epochs=20):\n",
    "    from torchvision.models import resnet50\n",
    "    model = resnet50(pretrained=True)\n",
    "    #model = resnet20()\n",
    "    #model.load_state_dict(torch.load(\"done_rn20_%s_%s_160_amp_fixed.pth\" % (seed, width)))\n",
    "    print(model, file=sys.stderr)\n",
    "    model.cuda()\n",
    "    \n",
    "    \n",
    "    #optimizer = torch.optim.AdamW(model.parameters(), 0.001)\n",
    "    optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    opt0 = torch.optim.SGD(model.parameters(), 0.0, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2], gamma=0.1)\n",
    "    scheduler = torch.optim.lr_scheduler.PolynomialLR(optimizer, total_iters=epochs, power=1)\n",
    "    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 0.004, epochs, cycle_momentum=False)\n",
    "    criterion = nn.CrossEntropyLoss()#SoftTargetCrossEntropy()\n",
    "    criterion_val = nn.CrossEntropyLoss()\n",
    "    scaler = torch.cuda.amp.GradScaler()\n",
    "    \n",
    "    total_params = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if (\"conv\" in n or \"downsample\" in n) and \"Conv\" in str(type(m)) and m.weight.shape[1] > 3:\n",
    "            print(n, m.weight.shape)\n",
    "            total_params += m.weight.numel()\n",
    "            prune.l1_unstructured(m, name='weight', amount=sparsity)\n",
    "    print(\"tot\", total_params)\n",
    "    \n",
    "    best_acc1 = 0\n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    print(\"start acc no bn\", acc1)\n",
    "    train_loss = train(train_loader, model, criterion, opt0, scaler, -1)\n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    total_active = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if \"conv\" in n or \"downsample\" in n and \"Conv\" in str(type(m)) and m.weight.shape[1] > 3:\n",
    "            total_active += (m.weight != 0).sum().item()\n",
    "    print(\"start acc bn\", acc1, total_active, total_active / total_params)\n",
    "\n",
    "    for epoch in range(epochs):\n",
    "        train_loss = train(train_loader, model, criterion, optimizer, scaler, epoch)\n",
    "        acc1 = validate(val_loader, model, criterion_val).item()\n",
    "        scheduler.step()\n",
    "        \n",
    "        # remember best acc@1 and save checkpoint\n",
    "        is_best = acc1 > best_acc1\n",
    "        best_acc1 = max(acc1, best_acc1)\n",
    "        total_active = 0\n",
    "        for n, m in model.named_modules():\n",
    "            if \"conv\" in n or \"downsample\" in n and \"Conv\" in str(type(m)) and m.weight.shape[1] > 3:\n",
    "                total_active += (m.weight != 0).sum().item()\n",
    "        print(\"epoch\", epoch, train_loss, acc1, optimizer.param_groups[0]['lr'], total_active, total_active / total_params)\n",
    "    \n",
    "    return acc1, copy.deepcopy(model.state_dict())\n",
    "\n",
    "acc, end = get_res()\n",
    "\n",
    "print(\"acc\", acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a063e74c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
