{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c1b4f720",
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "seed = 1\n",
    "sparsity = 0.9\n",
    "width = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6edfc04b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fda18cc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import os\n",
    "import torchvision.transforms as transforms\n",
    "import torchvision.datasets as datasets\n",
    "import time\n",
    "import copy\n",
    "import sys\n",
    "\n",
    "import random\n",
    "import numpy as np\n",
    "import torch\n",
    "from sklearn.decomposition import PCA\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as ss\n",
    "from timm.data import Mixup\n",
    "from timm.loss import SoftTargetCrossEntropy\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.init as init\n",
    "import torch.nn.functional as F\n",
    "from torch.autograd import Variable\n",
    "\n",
    "import sys\n",
    "import numpy as np\n",
    "import torch.nn.utils.prune as prune\n",
    "from datautils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c9b3aabc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_seed(seed=42, rank=0):\n",
    "    torch.manual_seed(seed + rank)\n",
    "    np.random.seed(seed + rank)\n",
    "    random.seed(seed + rank)\n",
    "\n",
    "random_seed(seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2348c12a",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader, val_loader = get_loaders(\n",
    "    \"imagenet\", path=\"\",\n",
    "    batchsize=256, workers=8,\n",
    "    nsamples=-1, seed=0,\n",
    "    noaug=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c50e599a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(train_loader, model, criterion, optimizer, scaler, epoch):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    data_time = AverageMeter('Data', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    #top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    #top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(train_loader),\n",
    "        [batch_time, data_time, losses],\n",
    "        prefix=\"Epoch: [{}]\".format(epoch))\n",
    "\n",
    "    # switch to train mode\n",
    "    model.train()\n",
    "\n",
    "    end = time.time()\n",
    "    for i, (images, target) in enumerate(train_loader):\n",
    "        # measure data loading time\n",
    "        data_time.update(time.time() - end)\n",
    "        images = images.cuda(non_blocking=True)\n",
    "        target = target.cuda(non_blocking=True)\n",
    "\n",
    "        # compute output\n",
    "        with torch.cuda.amp.autocast():\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "        # measure accuracy and record loss\n",
    "        #acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "        losses.update(loss.item(), images.size(0))\n",
    "        #top1.update(acc1[0], images.size(0))\n",
    "        #top5.update(acc5[0], images.size(0))\n",
    "\n",
    "        # compute gradient and do SGD step\n",
    "        optimizer.zero_grad()\n",
    "        scaler.scale(loss).backward()\n",
    "        scaler.step(optimizer)\n",
    "        scaler.update()\n",
    "        #loss.backward()\n",
    "        #optimizer.step()\n",
    "\n",
    "        # measure elapsed time\n",
    "        batch_time.update(time.time() - end)\n",
    "        end = time.time()\n",
    "\n",
    "        if i % 50 == 0:\n",
    "            progress.display(i)\n",
    "        if epoch == -1 and i == 50:\n",
    "            break\n",
    "\n",
    "    return losses.avg\n",
    "\n",
    "\n",
    "def validate(val_loader, model, criterion):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(val_loader),\n",
    "        [batch_time, losses, top1, top5],\n",
    "        prefix='Test: ')\n",
    "\n",
    "    # switch to evaluate mode\n",
    "    model.eval()\n",
    "\n",
    "    with torch.no_grad():\n",
    "        end = time.time()\n",
    "        for i, (images, target) in enumerate(val_loader):\n",
    "            images = images.cuda(non_blocking=True)\n",
    "            target = target.cuda(non_blocking=True)\n",
    "\n",
    "            # compute output\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "            # measure accuracy and record loss\n",
    "            acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "            losses.update(loss.item(), images.size(0))\n",
    "            top1.update(acc1[0], images.size(0))\n",
    "            top5.update(acc5[0], images.size(0))\n",
    "\n",
    "            # measure elapsed time\n",
    "            batch_time.update(time.time() - end)\n",
    "            end = time.time()\n",
    "\n",
    "            if i % 50 == 0:\n",
    "                progress.display(i)\n",
    "\n",
    "        # TODO: this should also be done with the ProgressMeter\n",
    "        #print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'\n",
    "        #      .format(top1=top1, top5=top5))\n",
    "\n",
    "    return top1.avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "698abd39",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):\n",
    "    torch.save(state, filename)\n",
    "    if is_best:\n",
    "        shutil.copyfile(filename, 'model_best.pth.tar')\n",
    "\n",
    "\n",
    "class AverageMeter(object):\n",
    "    \"\"\"Computes and stores the average and current value\"\"\"\n",
    "    def __init__(self, name, fmt=':f'):\n",
    "        self.name = name\n",
    "        self.fmt = fmt\n",
    "        self.reset()\n",
    "\n",
    "    def reset(self):\n",
    "        self.val = 0\n",
    "        self.avg = 0\n",
    "        self.sum = 0\n",
    "        self.count = 0\n",
    "\n",
    "    def update(self, val, n=1):\n",
    "        self.val = val\n",
    "        self.sum += val * n\n",
    "        self.count += n\n",
    "        self.avg = self.sum / self.count\n",
    "\n",
    "    def __str__(self):\n",
    "        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'\n",
    "        return fmtstr.format(**self.__dict__)\n",
    "\n",
    "\n",
    "class ProgressMeter(object):\n",
    "    def __init__(self, num_batches, meters, prefix=\"\"):\n",
    "        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)\n",
    "        self.meters = meters\n",
    "        self.prefix = prefix\n",
    "\n",
    "    def display(self, batch):\n",
    "        entries = [self.prefix + self.batch_fmtstr.format(batch)]\n",
    "        entries += [str(meter) for meter in self.meters]\n",
    "        print('\\t'.join(entries))\n",
    "\n",
    "    def _get_batch_fmtstr(self, num_batches):\n",
    "        num_digits = len(str(num_batches // 1))\n",
    "        fmt = '{:' + str(num_digits) + 'd}'\n",
    "        return '[' + fmt + '/' + fmt.format(num_batches) + ']'\n",
    "\n",
    "\n",
    "def adjust_learning_rate(optimizer, epoch):\n",
    "    \"\"\"Sets the learning rate to the initial LR decayed by 10 every 30 epochs\"\"\"\n",
    "    lr = LR * (0.1 ** (epoch // 30))\n",
    "    for param_group in optimizer.param_groups:\n",
    "        param_group['lr'] = lr\n",
    "\n",
    "\n",
    "def accuracy(output, target, topk=(1,)):\n",
    "    \"\"\"Computes the accuracy over the k top predictions for the specified values of k\"\"\"\n",
    "    with torch.no_grad():\n",
    "        maxk = max(topk)\n",
    "        batch_size = target.size(0)\n",
    "\n",
    "        _, pred = output.topk(maxk, 1, True, True)\n",
    "        pred = pred.t()\n",
    "        correct = pred.eq(target.view(1, -1).expand_as(pred))\n",
    "\n",
    "        res = []\n",
    "        for k in topk:\n",
    "            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)\n",
    "            res.append(correct_k.mul_(100.0 / batch_size))\n",
    "        return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "dba446b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ResNet(\n",
      "  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
      "  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "  (relu): ReLU(inplace=True)\n",
      "  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
      "  (layer1): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (layer2): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (layer3): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (4): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (5): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (layer4): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "        (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "    )\n",
      "  )\n",
      "  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
      "  (fc): Linear(in_features=2048, out_features=1000, bias=True)\n",
      ")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "layer1.0.conv1 torch.Size([64, 64, 1, 1])\n",
      "layer1.0.conv2 torch.Size([64, 64, 3, 3])\n",
      "layer1.0.conv3 torch.Size([256, 64, 1, 1])\n",
      "layer1.0.downsample.0 torch.Size([256, 64, 1, 1])\n",
      "layer1.1.conv1 torch.Size([64, 256, 1, 1])\n",
      "layer1.1.conv2 torch.Size([64, 64, 3, 3])\n",
      "layer1.1.conv3 torch.Size([256, 64, 1, 1])\n",
      "layer1.2.conv1 torch.Size([64, 256, 1, 1])\n",
      "layer1.2.conv2 torch.Size([64, 64, 3, 3])\n",
      "layer1.2.conv3 torch.Size([256, 64, 1, 1])\n",
      "layer2.0.conv1 torch.Size([128, 256, 1, 1])\n",
      "layer2.0.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.0.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer2.0.downsample.0 torch.Size([512, 256, 1, 1])\n",
      "layer2.1.conv1 torch.Size([128, 512, 1, 1])\n",
      "layer2.1.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.1.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer2.2.conv1 torch.Size([128, 512, 1, 1])\n",
      "layer2.2.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.2.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer2.3.conv1 torch.Size([128, 512, 1, 1])\n",
      "layer2.3.conv2 torch.Size([128, 128, 3, 3])\n",
      "layer2.3.conv3 torch.Size([512, 128, 1, 1])\n",
      "layer3.0.conv1 torch.Size([256, 512, 1, 1])\n",
      "layer3.0.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.0.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.0.downsample.0 torch.Size([1024, 512, 1, 1])\n",
      "layer3.1.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.1.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.1.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.2.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.2.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.2.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.3.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.3.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.3.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.4.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.4.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.4.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer3.5.conv1 torch.Size([256, 1024, 1, 1])\n",
      "layer3.5.conv2 torch.Size([256, 256, 3, 3])\n",
      "layer3.5.conv3 torch.Size([1024, 256, 1, 1])\n",
      "layer4.0.conv1 torch.Size([512, 1024, 1, 1])\n",
      "layer4.0.conv2 torch.Size([512, 512, 3, 3])\n",
      "layer4.0.conv3 torch.Size([2048, 512, 1, 1])\n",
      "layer4.0.downsample.0 torch.Size([2048, 1024, 1, 1])\n",
      "layer4.1.conv1 torch.Size([512, 2048, 1, 1])\n",
      "layer4.1.conv2 torch.Size([512, 512, 3, 3])\n",
      "layer4.1.conv3 torch.Size([2048, 512, 1, 1])\n",
      "layer4.2.conv1 torch.Size([512, 2048, 1, 1])\n",
      "layer4.2.conv2 torch.Size([512, 512, 3, 3])\n",
      "layer4.2.conv3 torch.Size([2048, 512, 1, 1])\n",
      "tot 23445504\n",
      "Test: [  0/196]\tTime  6.582 ( 6.582)\tLoss 5.5749e+01 (5.5749e+01)\tAcc@1   0.00 (  0.00)\tAcc@5   0.00 (  0.00)\n",
      "Test: [ 50/196]\tTime  0.375 ( 0.496)\tLoss 5.9513e+01 (5.1148e+01)\tAcc@1   0.00 (  0.00)\tAcc@5   0.00 (  0.00)\n",
      "Test: [100/196]\tTime  0.376 ( 0.436)\tLoss 3.9376e+01 (5.0243e+01)\tAcc@1   0.00 (  0.00)\tAcc@5   0.00 (  0.20)\n",
      "Test: [150/196]\tTime  0.377 ( 0.416)\tLoss 3.5130e+01 (4.8399e+01)\tAcc@1   0.00 (  0.00)\tAcc@5   0.00 (  0.54)\n",
      "start acc no bn 0.09799999743700027\n",
      "Epoch: [-1][   0/5005]\tTime  3.223 ( 3.223)\tData  2.409 ( 2.409)\tLoss 5.9742e+00 (5.9742e+00)\n",
      "Epoch: [-1][  50/5005]\tTime  0.558 ( 0.610)\tData  0.000 ( 0.047)\tLoss 6.0983e+00 (6.1084e+00)\n",
      "Test: [  0/196]\tTime  3.330 ( 3.330)\tLoss 6.5021e+00 (6.5021e+00)\tAcc@1  12.89 ( 12.89)\tAcc@5  25.78 ( 25.78)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 5.4744e+00 (6.1362e+00)\tAcc@1  22.27 (  9.62)\tAcc@5  47.66 ( 23.22)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 5.8354e+00 (6.1160e+00)\tAcc@1   9.38 (  9.31)\tAcc@5  24.61 ( 22.18)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 5.3836e+00 (6.0562e+00)\tAcc@1  16.02 (  9.18)\tAcc@5  31.25 ( 22.11)\n",
      "start acc bn 9.871999740600586 2353956 0.10040116859931866\n",
      "Epoch: [0][   0/5005]\tTime  3.123 ( 3.123)\tData  2.553 ( 2.553)\tLoss 6.2004e+00 (6.2004e+00)\n",
      "Epoch: [0][  50/5005]\tTime  0.559 ( 0.609)\tData  0.000 ( 0.050)\tLoss 2.4272e+00 (3.2868e+00)\n",
      "Epoch: [0][ 100/5005]\tTime  0.558 ( 0.584)\tData  0.000 ( 0.025)\tLoss 1.8837e+00 (2.7400e+00)\n",
      "Epoch: [0][ 150/5005]\tTime  0.558 ( 0.575)\tData  0.000 ( 0.017)\tLoss 1.9966e+00 (2.4935e+00)\n",
      "Epoch: [0][ 200/5005]\tTime  0.558 ( 0.571)\tData  0.000 ( 0.013)\tLoss 2.0496e+00 (2.3432e+00)\n",
      "Epoch: [0][ 250/5005]\tTime  0.558 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.7601e+00 (2.2347e+00)\n",
      "Epoch: [0][ 300/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.009)\tLoss 1.8681e+00 (2.1523e+00)\n",
      "Epoch: [0][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.4050e+00 (2.0907e+00)\n",
      "Epoch: [0][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.007)\tLoss 1.6324e+00 (2.0417e+00)\n",
      "Epoch: [0][ 450/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.7880e+00 (1.9982e+00)\n",
      "Epoch: [0][ 500/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.5990e+00 (1.9614e+00)\n",
      "Epoch: [0][ 550/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.4629e+00 (1.9313e+00)\n",
      "Epoch: [0][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.6666e+00 (1.9050e+00)\n",
      "Epoch: [0][ 650/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.5359e+00 (1.8792e+00)\n",
      "Epoch: [0][ 700/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.8374e+00 (1.8587e+00)\n",
      "Epoch: [0][ 750/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.6385e+00 (1.8399e+00)\n",
      "Epoch: [0][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.7355e+00 (1.8216e+00)\n",
      "Epoch: [0][ 850/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.7751e+00 (1.8063e+00)\n",
      "Epoch: [0][ 900/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.5286e+00 (1.7906e+00)\n",
      "Epoch: [0][ 950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.4714e+00 (1.7770e+00)\n",
      "Epoch: [0][1000/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.4817e+00 (1.7647e+00)\n",
      "Epoch: [0][1050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.5203e+00 (1.7527e+00)\n",
      "Epoch: [0][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.5268e+00 (1.7422e+00)\n",
      "Epoch: [0][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.5904e+00 (1.7320e+00)\n",
      "Epoch: [0][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.6677e+00 (1.7228e+00)\n",
      "Epoch: [0][1250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3806e+00 (1.7136e+00)\n",
      "Epoch: [0][1300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3830e+00 (1.7047e+00)\n",
      "Epoch: [0][1350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4548e+00 (1.6962e+00)\n",
      "Epoch: [0][1400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3505e+00 (1.6892e+00)\n",
      "Epoch: [0][1450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4954e+00 (1.6823e+00)\n",
      "Epoch: [0][1500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3461e+00 (1.6757e+00)\n",
      "Epoch: [0][1550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4849e+00 (1.6692e+00)\n",
      "Epoch: [0][1600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4239e+00 (1.6625e+00)\n",
      "Epoch: [0][1650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3293e+00 (1.6566e+00)\n",
      "Epoch: [0][1700/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3843e+00 (1.6512e+00)\n",
      "Epoch: [0][1750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.1489e+00 (1.6455e+00)\n",
      "Epoch: [0][1800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.6771e+00 (1.6396e+00)\n",
      "Epoch: [0][1850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.7652e+00 (1.6345e+00)\n",
      "Epoch: [0][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.7180e+00 (1.6299e+00)\n",
      "Epoch: [0][1950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4737e+00 (1.6249e+00)\n",
      "Epoch: [0][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4271e+00 (1.6201e+00)\n",
      "Epoch: [0][2050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3160e+00 (1.6159e+00)\n",
      "Epoch: [0][2100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2928e+00 (1.6122e+00)\n",
      "Epoch: [0][2150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.7911e+00 (1.6087e+00)\n",
      "Epoch: [0][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4030e+00 (1.6046e+00)\n",
      "Epoch: [0][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4959e+00 (1.6006e+00)\n",
      "Epoch: [0][2300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4402e+00 (1.5967e+00)\n",
      "Epoch: [0][2350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2962e+00 (1.5930e+00)\n",
      "Epoch: [0][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.5524e+00 (1.5895e+00)\n",
      "Epoch: [0][2450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4832e+00 (1.5859e+00)\n",
      "Epoch: [0][2500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3152e+00 (1.5826e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [0][2550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3117e+00 (1.5793e+00)\n",
      "Epoch: [0][2600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4314e+00 (1.5765e+00)\n",
      "Epoch: [0][2650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2277e+00 (1.5737e+00)\n",
      "Epoch: [0][2700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4547e+00 (1.5711e+00)\n",
      "Epoch: [0][2750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5176e+00 (1.5685e+00)\n",
      "Epoch: [0][2800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3230e+00 (1.5658e+00)\n",
      "Epoch: [0][2850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2650e+00 (1.5634e+00)\n",
      "Epoch: [0][2900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3520e+00 (1.5604e+00)\n",
      "Epoch: [0][2950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4344e+00 (1.5578e+00)\n",
      "Epoch: [0][3000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3306e+00 (1.5552e+00)\n",
      "Epoch: [0][3050/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2218e+00 (1.5525e+00)\n",
      "Epoch: [0][3100/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2852e+00 (1.5495e+00)\n",
      "Epoch: [0][3150/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4892e+00 (1.5477e+00)\n",
      "Epoch: [0][3200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3202e+00 (1.5456e+00)\n",
      "Epoch: [0][3250/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2815e+00 (1.5432e+00)\n",
      "Epoch: [0][3300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3685e+00 (1.5409e+00)\n",
      "Epoch: [0][3350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4466e+00 (1.5388e+00)\n",
      "Epoch: [0][3400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2133e+00 (1.5368e+00)\n",
      "Epoch: [0][3450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3761e+00 (1.5346e+00)\n",
      "Epoch: [0][3500/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5207e+00 (1.5329e+00)\n",
      "Epoch: [0][3550/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4235e+00 (1.5313e+00)\n",
      "Epoch: [0][3600/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4824e+00 (1.5293e+00)\n",
      "Epoch: [0][3650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3702e+00 (1.5277e+00)\n",
      "Epoch: [0][3700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2761e+00 (1.5259e+00)\n",
      "Epoch: [0][3750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4231e+00 (1.5240e+00)\n",
      "Epoch: [0][3800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2309e+00 (1.5224e+00)\n",
      "Epoch: [0][3850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3541e+00 (1.5206e+00)\n",
      "Epoch: [0][3900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2703e+00 (1.5191e+00)\n",
      "Epoch: [0][3950/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2201e+00 (1.5174e+00)\n",
      "Epoch: [0][4000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4373e+00 (1.5157e+00)\n",
      "Epoch: [0][4050/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2962e+00 (1.5142e+00)\n",
      "Epoch: [0][4100/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3386e+00 (1.5126e+00)\n",
      "Epoch: [0][4150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5029e+00 (1.5113e+00)\n",
      "Epoch: [0][4200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1993e+00 (1.5098e+00)\n",
      "Epoch: [0][4250/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2405e+00 (1.5087e+00)\n",
      "Epoch: [0][4300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5332e+00 (1.5075e+00)\n",
      "Epoch: [0][4350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2970e+00 (1.5059e+00)\n",
      "Epoch: [0][4400/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3164e+00 (1.5041e+00)\n",
      "Epoch: [0][4450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4325e+00 (1.5027e+00)\n",
      "Epoch: [0][4500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2870e+00 (1.5014e+00)\n",
      "Epoch: [0][4550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5138e+00 (1.5001e+00)\n",
      "Epoch: [0][4600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5257e+00 (1.4987e+00)\n",
      "Epoch: [0][4650/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5283e+00 (1.4973e+00)\n",
      "Epoch: [0][4700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3351e+00 (1.4958e+00)\n",
      "Epoch: [0][4750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1942e+00 (1.4947e+00)\n",
      "Epoch: [0][4800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3197e+00 (1.4933e+00)\n",
      "Epoch: [0][4850/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2895e+00 (1.4920e+00)\n",
      "Epoch: [0][4900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2674e+00 (1.4906e+00)\n",
      "Epoch: [0][4950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2601e+00 (1.4893e+00)\n",
      "Epoch: [0][5000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4106e+00 (1.4883e+00)\n",
      "Test: [  0/196]\tTime  3.301 ( 3.301)\tLoss 7.5732e-01 (7.5732e-01)\tAcc@1  79.30 ( 79.30)\tAcc@5  96.09 ( 96.09)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.434)\tLoss 6.2777e-01 (1.0133e+00)\tAcc@1  83.20 ( 73.02)\tAcc@5  95.70 ( 92.14)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 1.8205e+00 (1.1721e+00)\tAcc@1  53.12 ( 69.83)\tAcc@5  80.47 ( 90.27)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.4013e+00 (1.3123e+00)\tAcc@1  69.92 ( 67.27)\tAcc@5  85.94 ( 88.26)\n",
      "epoch 0 1.4882097387593878 66.23600006103516 0.0095 2353956 0.10040116859931866\n",
      "Epoch: [1][   0/5005]\tTime  3.284 ( 3.284)\tData  2.725 ( 2.725)\tLoss 1.3986e+00 (1.3986e+00)\n",
      "Epoch: [1][  50/5005]\tTime  0.558 ( 0.612)\tData  0.000 ( 0.054)\tLoss 1.2806e+00 (1.3317e+00)\n",
      "Epoch: [1][ 100/5005]\tTime  0.558 ( 0.586)\tData  0.000 ( 0.027)\tLoss 1.2496e+00 (1.3408e+00)\n",
      "Epoch: [1][ 150/5005]\tTime  0.558 ( 0.577)\tData  0.000 ( 0.018)\tLoss 1.3316e+00 (1.3305e+00)\n",
      "Epoch: [1][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.014)\tLoss 1.5857e+00 (1.3251e+00)\n",
      "Epoch: [1][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.011)\tLoss 1.2266e+00 (1.3246e+00)\n",
      "Epoch: [1][ 300/5005]\tTime  0.558 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.5085e+00 (1.3254e+00)\n",
      "Epoch: [1][ 350/5005]\tTime  0.558 ( 0.566)\tData  0.000 ( 0.008)\tLoss 1.3142e+00 (1.3230e+00)\n",
      "Epoch: [1][ 400/5005]\tTime  0.558 ( 0.565)\tData  0.000 ( 0.007)\tLoss 1.2463e+00 (1.3230e+00)\n",
      "Epoch: [1][ 450/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.4760e+00 (1.3215e+00)\n",
      "Epoch: [1][ 500/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.2334e+00 (1.3195e+00)\n",
      "Epoch: [1][ 550/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.1178e+00 (1.3209e+00)\n",
      "Epoch: [1][ 600/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.005)\tLoss 1.3069e+00 (1.3210e+00)\n",
      "Epoch: [1][ 650/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.2602e+00 (1.3187e+00)\n",
      "Epoch: [1][ 700/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.1065e+00 (1.3174e+00)\n",
      "Epoch: [1][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.4385e+00 (1.3173e+00)\n",
      "Epoch: [1][ 800/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.004)\tLoss 1.2204e+00 (1.3172e+00)\n",
      "Epoch: [1][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1864e+00 (1.3168e+00)\n",
      "Epoch: [1][ 900/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.4710e+00 (1.3166e+00)\n",
      "Epoch: [1][ 950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1772e+00 (1.3162e+00)\n",
      "Epoch: [1][1000/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1971e+00 (1.3165e+00)\n",
      "Epoch: [1][1050/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.3324e+00 (1.3163e+00)\n",
      "Epoch: [1][1100/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.3217e+00 (1.3146e+00)\n",
      "Epoch: [1][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1593e+00 (1.3155e+00)\n",
      "Epoch: [1][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2485e+00 (1.3158e+00)\n",
      "Epoch: [1][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1552e+00 (1.3153e+00)\n",
      "Epoch: [1][1300/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3230e+00 (1.3154e+00)\n",
      "Epoch: [1][1350/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2844e+00 (1.3156e+00)\n",
      "Epoch: [1][1400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4517e+00 (1.3160e+00)\n",
      "Epoch: [1][1450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.6877e+00 (1.3159e+00)\n",
      "Epoch: [1][1500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.6468e+00 (1.3157e+00)\n",
      "Epoch: [1][1550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3551e+00 (1.3162e+00)\n",
      "Epoch: [1][1600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2706e+00 (1.3172e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [1][1650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3333e+00 (1.3178e+00)\n",
      "Epoch: [1][1700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3678e+00 (1.3183e+00)\n",
      "Epoch: [1][1750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4587e+00 (1.3182e+00)\n",
      "Epoch: [1][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4753e+00 (1.3185e+00)\n",
      "Epoch: [1][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4913e+00 (1.3181e+00)\n",
      "Epoch: [1][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2681e+00 (1.3180e+00)\n",
      "Epoch: [1][1950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.5629e+00 (1.3177e+00)\n",
      "Epoch: [1][2000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.4205e+00 (1.3184e+00)\n",
      "Epoch: [1][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0460e+00 (1.3180e+00)\n",
      "Epoch: [1][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2332e+00 (1.3174e+00)\n",
      "Epoch: [1][2150/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3895e+00 (1.3176e+00)\n",
      "Epoch: [1][2200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3282e+00 (1.3177e+00)\n",
      "Epoch: [1][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2748e+00 (1.3178e+00)\n",
      "Epoch: [1][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3688e+00 (1.3174e+00)\n",
      "Epoch: [1][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4469e+00 (1.3177e+00)\n",
      "Epoch: [1][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4158e+00 (1.3177e+00)\n",
      "Epoch: [1][2450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3522e+00 (1.3180e+00)\n",
      "Epoch: [1][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2995e+00 (1.3178e+00)\n",
      "Epoch: [1][2550/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3864e+00 (1.3183e+00)\n",
      "Epoch: [1][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2064e+00 (1.3184e+00)\n",
      "Epoch: [1][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3414e+00 (1.3185e+00)\n",
      "Epoch: [1][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3141e+00 (1.3189e+00)\n",
      "Epoch: [1][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0906e+00 (1.3186e+00)\n",
      "Epoch: [1][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4713e+00 (1.3184e+00)\n",
      "Epoch: [1][2850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4986e+00 (1.3182e+00)\n",
      "Epoch: [1][2900/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3358e+00 (1.3182e+00)\n",
      "Epoch: [1][2950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1890e+00 (1.3181e+00)\n",
      "Epoch: [1][3000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2281e+00 (1.3180e+00)\n",
      "Epoch: [1][3050/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1850e+00 (1.3180e+00)\n",
      "Epoch: [1][3100/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3372e+00 (1.3180e+00)\n",
      "Epoch: [1][3150/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2743e+00 (1.3177e+00)\n",
      "Epoch: [1][3200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4042e+00 (1.3175e+00)\n",
      "Epoch: [1][3250/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2654e+00 (1.3178e+00)\n",
      "Epoch: [1][3300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3052e+00 (1.3176e+00)\n",
      "Epoch: [1][3350/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2566e+00 (1.3175e+00)\n",
      "Epoch: [1][3400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3563e+00 (1.3172e+00)\n",
      "Epoch: [1][3450/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3499e+00 (1.3171e+00)\n",
      "Epoch: [1][3500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5391e+00 (1.3169e+00)\n",
      "Epoch: [1][3550/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2300e+00 (1.3169e+00)\n",
      "Epoch: [1][3600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4445e+00 (1.3169e+00)\n",
      "Epoch: [1][3650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2523e+00 (1.3173e+00)\n",
      "Epoch: [1][3700/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5825e+00 (1.3175e+00)\n",
      "Epoch: [1][3750/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3103e+00 (1.3178e+00)\n",
      "Epoch: [1][3800/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3716e+00 (1.3180e+00)\n",
      "Epoch: [1][3850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3924e+00 (1.3180e+00)\n",
      "Epoch: [1][3900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3073e+00 (1.3182e+00)\n",
      "Epoch: [1][3950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2070e+00 (1.3180e+00)\n",
      "Epoch: [1][4000/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2822e+00 (1.3177e+00)\n",
      "Epoch: [1][4050/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2736e+00 (1.3177e+00)\n",
      "Epoch: [1][4100/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2196e+00 (1.3177e+00)\n",
      "Epoch: [1][4150/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3868e+00 (1.3178e+00)\n",
      "Epoch: [1][4200/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3749e+00 (1.3177e+00)\n",
      "Epoch: [1][4250/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3135e+00 (1.3175e+00)\n",
      "Epoch: [1][4300/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3512e+00 (1.3172e+00)\n",
      "Epoch: [1][4350/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2178e+00 (1.3171e+00)\n",
      "Epoch: [1][4400/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2593e+00 (1.3171e+00)\n",
      "Epoch: [1][4450/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4868e+00 (1.3171e+00)\n",
      "Epoch: [1][4500/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2068e+00 (1.3170e+00)\n",
      "Epoch: [1][4550/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.0993e+00 (1.3172e+00)\n",
      "Epoch: [1][4600/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.4018e+00 (1.3170e+00)\n",
      "Epoch: [1][4650/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1194e+00 (1.3168e+00)\n",
      "Epoch: [1][4700/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5198e+00 (1.3171e+00)\n",
      "Epoch: [1][4750/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.5030e+00 (1.3172e+00)\n",
      "Epoch: [1][4800/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3570e+00 (1.3171e+00)\n",
      "Epoch: [1][4850/5005]\tTime  0.558 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3192e+00 (1.3171e+00)\n",
      "Epoch: [1][4900/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.2718e+00 (1.3168e+00)\n",
      "Epoch: [1][4950/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.1904e+00 (1.3167e+00)\n",
      "Epoch: [1][5000/5005]\tTime  0.559 ( 0.559)\tData  0.000 ( 0.001)\tLoss 1.3713e+00 (1.3165e+00)\n",
      "Test: [  0/196]\tTime  3.423 ( 3.423)\tLoss 7.7394e-01 (7.7394e-01)\tAcc@1  76.17 ( 76.17)\tAcc@5  94.92 ( 94.92)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.437)\tLoss 6.2304e-01 (1.0339e+00)\tAcc@1  83.59 ( 72.85)\tAcc@5  96.88 ( 92.24)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 2.0936e+00 (1.1612e+00)\tAcc@1  44.53 ( 70.16)\tAcc@5  78.52 ( 90.54)\n",
      "Test: [150/196]\tTime  0.378 ( 0.398)\tLoss 1.4001e+00 (1.2952e+00)\tAcc@1  70.70 ( 67.62)\tAcc@5  85.16 ( 88.63)\n",
      "epoch 1 1.316484202589904 66.7719955444336 0.009000000000000001 2353956 0.10040116859931866\n",
      "Epoch: [2][   0/5005]\tTime  3.046 ( 3.046)\tData  2.487 ( 2.487)\tLoss 1.2791e+00 (1.2791e+00)\n",
      "Epoch: [2][  50/5005]\tTime  0.560 ( 0.608)\tData  0.000 ( 0.049)\tLoss 1.3160e+00 (1.2727e+00)\n",
      "Epoch: [2][ 100/5005]\tTime  0.559 ( 0.584)\tData  0.000 ( 0.025)\tLoss 1.1806e+00 (1.2588e+00)\n",
      "Epoch: [2][ 150/5005]\tTime  0.559 ( 0.576)\tData  0.000 ( 0.017)\tLoss 1.2909e+00 (1.2565e+00)\n",
      "Epoch: [2][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.013)\tLoss 1.2215e+00 (1.2597e+00)\n",
      "Epoch: [2][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.3503e+00 (1.2630e+00)\n",
      "Epoch: [2][ 300/5005]\tTime  0.560 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.2807e+00 (1.2626e+00)\n",
      "Epoch: [2][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.3527e+00 (1.2619e+00)\n",
      "Epoch: [2][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1705e+00 (1.2617e+00)\n",
      "Epoch: [2][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.5927e+00 (1.2600e+00)\n",
      "Epoch: [2][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.2129e+00 (1.2600e+00)\n",
      "Epoch: [2][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.1954e+00 (1.2610e+00)\n",
      "Epoch: [2][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1783e+00 (1.2629e+00)\n",
      "Epoch: [2][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1986e+00 (1.2633e+00)\n",
      "Epoch: [2][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1285e+00 (1.2645e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [2][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3027e+00 (1.2632e+00)\n",
      "Epoch: [2][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3378e+00 (1.2627e+00)\n",
      "Epoch: [2][ 850/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2538e+00 (1.2632e+00)\n",
      "Epoch: [2][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2693e+00 (1.2627e+00)\n",
      "Epoch: [2][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2643e+00 (1.2617e+00)\n",
      "Epoch: [2][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2918e+00 (1.2617e+00)\n",
      "Epoch: [2][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3807e+00 (1.2629e+00)\n",
      "Epoch: [2][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1302e+00 (1.2619e+00)\n",
      "Epoch: [2][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3756e+00 (1.2629e+00)\n",
      "Epoch: [2][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3058e+00 (1.2624e+00)\n",
      "Epoch: [2][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3985e+00 (1.2632e+00)\n",
      "Epoch: [2][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0816e+00 (1.2645e+00)\n",
      "Epoch: [2][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2090e+00 (1.2644e+00)\n",
      "Epoch: [2][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2195e+00 (1.2639e+00)\n",
      "Epoch: [2][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3449e+00 (1.2642e+00)\n",
      "Epoch: [2][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1537e+00 (1.2650e+00)\n",
      "Epoch: [2][1550/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.4046e+00 (1.2650e+00)\n",
      "Epoch: [2][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.4007e+00 (1.2654e+00)\n",
      "Epoch: [2][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0677e+00 (1.2655e+00)\n",
      "Epoch: [2][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1352e+00 (1.2654e+00)\n",
      "Epoch: [2][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0473e+00 (1.2645e+00)\n",
      "Epoch: [2][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.4378e+00 (1.2645e+00)\n",
      "Epoch: [2][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.3687e+00 (1.2650e+00)\n",
      "Epoch: [2][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3933e+00 (1.2642e+00)\n",
      "Epoch: [2][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2653e+00 (1.2641e+00)\n",
      "Epoch: [2][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2836e+00 (1.2646e+00)\n",
      "Epoch: [2][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1768e+00 (1.2647e+00)\n",
      "Epoch: [2][2100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3814e+00 (1.2642e+00)\n",
      "Epoch: [2][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7718e-01 (1.2645e+00)\n",
      "Epoch: [2][2200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4245e+00 (1.2652e+00)\n",
      "Epoch: [2][2250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2558e+00 (1.2653e+00)\n",
      "Epoch: [2][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0898e+00 (1.2654e+00)\n",
      "Epoch: [2][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3150e+00 (1.2653e+00)\n",
      "Epoch: [2][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2726e+00 (1.2653e+00)\n",
      "Epoch: [2][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3678e+00 (1.2657e+00)\n",
      "Epoch: [2][2500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2531e+00 (1.2662e+00)\n",
      "Epoch: [2][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2760e+00 (1.2663e+00)\n",
      "Epoch: [2][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4314e+00 (1.2658e+00)\n",
      "Epoch: [2][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3593e+00 (1.2655e+00)\n",
      "Epoch: [2][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4101e+00 (1.2657e+00)\n",
      "Epoch: [2][2750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0875e+00 (1.2663e+00)\n",
      "Epoch: [2][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2322e+00 (1.2661e+00)\n",
      "Epoch: [2][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0957e+00 (1.2665e+00)\n",
      "Epoch: [2][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2997e+00 (1.2665e+00)\n",
      "Epoch: [2][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0316e+00 (1.2668e+00)\n",
      "Epoch: [2][3000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1346e+00 (1.2668e+00)\n",
      "Epoch: [2][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1741e+00 (1.2669e+00)\n",
      "Epoch: [2][3100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2760e+00 (1.2670e+00)\n",
      "Epoch: [2][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4341e+00 (1.2671e+00)\n",
      "Epoch: [2][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4194e+00 (1.2669e+00)\n",
      "Epoch: [2][3250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3155e+00 (1.2670e+00)\n",
      "Epoch: [2][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3324e+00 (1.2676e+00)\n",
      "Epoch: [2][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1777e+00 (1.2673e+00)\n",
      "Epoch: [2][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2793e+00 (1.2671e+00)\n",
      "Epoch: [2][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2704e+00 (1.2671e+00)\n",
      "Epoch: [2][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3940e+00 (1.2668e+00)\n",
      "Epoch: [2][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1600e+00 (1.2668e+00)\n",
      "Epoch: [2][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3812e+00 (1.2668e+00)\n",
      "Epoch: [2][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0529e+00 (1.2671e+00)\n",
      "Epoch: [2][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1030e+00 (1.2673e+00)\n",
      "Epoch: [2][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1211e+00 (1.2675e+00)\n",
      "Epoch: [2][3800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4456e+00 (1.2680e+00)\n",
      "Epoch: [2][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4732e+00 (1.2681e+00)\n",
      "Epoch: [2][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1720e+00 (1.2686e+00)\n",
      "Epoch: [2][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0810e+00 (1.2681e+00)\n",
      "Epoch: [2][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1820e+00 (1.2683e+00)\n",
      "Epoch: [2][4050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1613e+00 (1.2682e+00)\n",
      "Epoch: [2][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1936e+00 (1.2679e+00)\n",
      "Epoch: [2][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3325e+00 (1.2678e+00)\n",
      "Epoch: [2][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4712e+00 (1.2679e+00)\n",
      "Epoch: [2][4250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1325e+00 (1.2678e+00)\n",
      "Epoch: [2][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2760e+00 (1.2678e+00)\n",
      "Epoch: [2][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4644e+00 (1.2683e+00)\n",
      "Epoch: [2][4400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2610e+00 (1.2683e+00)\n",
      "Epoch: [2][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2226e+00 (1.2684e+00)\n",
      "Epoch: [2][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.5310e+00 (1.2683e+00)\n",
      "Epoch: [2][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1775e+00 (1.2680e+00)\n",
      "Epoch: [2][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2885e+00 (1.2680e+00)\n",
      "Epoch: [2][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1396e+00 (1.2680e+00)\n",
      "Epoch: [2][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2152e+00 (1.2682e+00)\n",
      "Epoch: [2][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2413e+00 (1.2684e+00)\n",
      "Epoch: [2][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3537e+00 (1.2684e+00)\n",
      "Epoch: [2][4850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0696e+00 (1.2683e+00)\n",
      "Epoch: [2][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3037e+00 (1.2683e+00)\n",
      "Epoch: [2][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1975e+00 (1.2684e+00)\n",
      "Epoch: [2][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3855e+00 (1.2687e+00)\n",
      "Test: [  0/196]\tTime  3.398 ( 3.398)\tLoss 8.7397e-01 (8.7397e-01)\tAcc@1  75.00 ( 75.00)\tAcc@5  94.14 ( 94.14)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.436)\tLoss 8.4085e-01 (1.0551e+00)\tAcc@1  77.34 ( 72.40)\tAcc@5  94.53 ( 91.73)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 1.7339e+00 (1.1925e+00)\tAcc@1  58.59 ( 69.48)\tAcc@5  82.42 ( 90.14)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.3621e+00 (1.3336e+00)\tAcc@1  69.92 ( 66.82)\tAcc@5  84.38 ( 88.06)\n",
      "epoch 2 1.2687634622129418 66.0219955444336 0.0085 2353956 0.10040116859931866\n",
      "Epoch: [3][   0/5005]\tTime  3.278 ( 3.278)\tData  2.713 ( 2.713)\tLoss 1.4356e+00 (1.4356e+00)\n",
      "Epoch: [3][  50/5005]\tTime  0.558 ( 0.612)\tData  0.000 ( 0.053)\tLoss 1.1244e+00 (1.2279e+00)\n",
      "Epoch: [3][ 100/5005]\tTime  0.559 ( 0.586)\tData  0.000 ( 0.027)\tLoss 1.0817e+00 (1.2346e+00)\n",
      "Epoch: [3][ 150/5005]\tTime  0.559 ( 0.577)\tData  0.000 ( 0.018)\tLoss 1.2548e+00 (1.2405e+00)\n",
      "Epoch: [3][ 200/5005]\tTime  0.559 ( 0.573)\tData  0.000 ( 0.014)\tLoss 1.2519e+00 (1.2257e+00)\n",
      "Epoch: [3][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.3461e+00 (1.2256e+00)\n",
      "Epoch: [3][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.3740e+00 (1.2235e+00)\n",
      "Epoch: [3][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.2226e+00 (1.2209e+00)\n",
      "Epoch: [3][ 400/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.1769e+00 (1.2233e+00)\n",
      "Epoch: [3][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1405e+00 (1.2232e+00)\n",
      "Epoch: [3][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.2233e+00 (1.2211e+00)\n",
      "Epoch: [3][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.2874e+00 (1.2193e+00)\n",
      "Epoch: [3][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.1207e+00 (1.2204e+00)\n",
      "Epoch: [3][ 650/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.3972e+00 (1.2231e+00)\n",
      "Epoch: [3][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0995e+00 (1.2217e+00)\n",
      "Epoch: [3][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0753e+00 (1.2218e+00)\n",
      "Epoch: [3][ 800/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.4075e+00 (1.2223e+00)\n",
      "Epoch: [3][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1560e+00 (1.2234e+00)\n",
      "Epoch: [3][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2392e+00 (1.2232e+00)\n",
      "Epoch: [3][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3311e+00 (1.2229e+00)\n",
      "Epoch: [3][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.4026e+00 (1.2231e+00)\n",
      "Epoch: [3][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0603e+00 (1.2222e+00)\n",
      "Epoch: [3][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1356e+00 (1.2234e+00)\n",
      "Epoch: [3][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3159e+00 (1.2228e+00)\n",
      "Epoch: [3][1200/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3076e+00 (1.2234e+00)\n",
      "Epoch: [3][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2736e+00 (1.2240e+00)\n",
      "Epoch: [3][1300/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0836e+00 (1.2247e+00)\n",
      "Epoch: [3][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2820e+00 (1.2251e+00)\n",
      "Epoch: [3][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3267e+00 (1.2257e+00)\n",
      "Epoch: [3][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1353e+00 (1.2251e+00)\n",
      "Epoch: [3][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2879e+00 (1.2258e+00)\n",
      "Epoch: [3][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2645e+00 (1.2257e+00)\n",
      "Epoch: [3][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1946e+00 (1.2269e+00)\n",
      "Epoch: [3][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1420e+00 (1.2271e+00)\n",
      "Epoch: [3][1700/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.4913e+00 (1.2277e+00)\n",
      "Epoch: [3][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1962e+00 (1.2283e+00)\n",
      "Epoch: [3][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3702e+00 (1.2280e+00)\n",
      "Epoch: [3][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3316e+00 (1.2278e+00)\n",
      "Epoch: [3][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2254e+00 (1.2283e+00)\n",
      "Epoch: [3][1950/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1879e+00 (1.2283e+00)\n",
      "Epoch: [3][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2133e+00 (1.2285e+00)\n",
      "Epoch: [3][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2382e+00 (1.2289e+00)\n",
      "Epoch: [3][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3214e+00 (1.2296e+00)\n",
      "Epoch: [3][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0333e+00 (1.2296e+00)\n",
      "Epoch: [3][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1551e+00 (1.2300e+00)\n",
      "Epoch: [3][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2709e+00 (1.2300e+00)\n",
      "Epoch: [3][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3370e+00 (1.2303e+00)\n",
      "Epoch: [3][2350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2306e+00 (1.2307e+00)\n",
      "Epoch: [3][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2631e+00 (1.2308e+00)\n",
      "Epoch: [3][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3027e+00 (1.2310e+00)\n",
      "Epoch: [3][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4045e+00 (1.2314e+00)\n",
      "Epoch: [3][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2060e+00 (1.2318e+00)\n",
      "Epoch: [3][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.6010e+00 (1.2318e+00)\n",
      "Epoch: [3][2650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2038e+00 (1.2315e+00)\n",
      "Epoch: [3][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2815e+00 (1.2323e+00)\n",
      "Epoch: [3][2750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1641e+00 (1.2325e+00)\n",
      "Epoch: [3][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4390e+00 (1.2332e+00)\n",
      "Epoch: [3][2850/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2095e+00 (1.2334e+00)\n",
      "Epoch: [3][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0982e+00 (1.2332e+00)\n",
      "Epoch: [3][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2628e+00 (1.2330e+00)\n",
      "Epoch: [3][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4749e+00 (1.2328e+00)\n",
      "Epoch: [3][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3004e+00 (1.2330e+00)\n",
      "Epoch: [3][3100/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4540e+00 (1.2335e+00)\n",
      "Epoch: [3][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2988e+00 (1.2337e+00)\n",
      "Epoch: [3][3200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0827e+00 (1.2340e+00)\n",
      "Epoch: [3][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1177e+00 (1.2343e+00)\n",
      "Epoch: [3][3300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2672e+00 (1.2339e+00)\n",
      "Epoch: [3][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3413e+00 (1.2339e+00)\n",
      "Epoch: [3][3400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2598e+00 (1.2342e+00)\n",
      "Epoch: [3][3450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2864e+00 (1.2345e+00)\n",
      "Epoch: [3][3500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2834e+00 (1.2347e+00)\n",
      "Epoch: [3][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0861e+00 (1.2348e+00)\n",
      "Epoch: [3][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3770e+00 (1.2351e+00)\n",
      "Epoch: [3][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1434e+00 (1.2351e+00)\n",
      "Epoch: [3][3700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2116e+00 (1.2352e+00)\n",
      "Epoch: [3][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3325e+00 (1.2355e+00)\n",
      "Epoch: [3][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1174e+00 (1.2361e+00)\n",
      "Epoch: [3][3850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2974e+00 (1.2359e+00)\n",
      "Epoch: [3][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2757e+00 (1.2357e+00)\n",
      "Epoch: [3][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0726e+00 (1.2362e+00)\n",
      "Epoch: [3][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3265e+00 (1.2364e+00)\n",
      "Epoch: [3][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2860e+00 (1.2363e+00)\n",
      "Epoch: [3][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3415e+00 (1.2361e+00)\n",
      "Epoch: [3][4150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0134e+00 (1.2362e+00)\n",
      "Epoch: [3][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1142e+00 (1.2365e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [3][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1738e+00 (1.2364e+00)\n",
      "Epoch: [3][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1273e+00 (1.2366e+00)\n",
      "Epoch: [3][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2666e+00 (1.2365e+00)\n",
      "Epoch: [3][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3989e+00 (1.2365e+00)\n",
      "Epoch: [3][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4492e+00 (1.2365e+00)\n",
      "Epoch: [3][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0786e+00 (1.2364e+00)\n",
      "Epoch: [3][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3811e+00 (1.2363e+00)\n",
      "Epoch: [3][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2835e+00 (1.2362e+00)\n",
      "Epoch: [3][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3651e+00 (1.2362e+00)\n",
      "Epoch: [3][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2580e+00 (1.2363e+00)\n",
      "Epoch: [3][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3023e+00 (1.2363e+00)\n",
      "Epoch: [3][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4067e+00 (1.2363e+00)\n",
      "Epoch: [3][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4648e+00 (1.2364e+00)\n",
      "Epoch: [3][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2898e+00 (1.2366e+00)\n",
      "Epoch: [3][4950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3489e+00 (1.2367e+00)\n",
      "Epoch: [3][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2140e+00 (1.2367e+00)\n",
      "Test: [  0/196]\tTime  3.514 ( 3.514)\tLoss 7.4731e-01 (7.4731e-01)\tAcc@1  78.52 ( 78.52)\tAcc@5  94.92 ( 94.92)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.438)\tLoss 6.4607e-01 (9.4127e-01)\tAcc@1  84.38 ( 74.96)\tAcc@5  95.70 ( 93.10)\n",
      "Test: [100/196]\tTime  0.378 ( 0.408)\tLoss 1.9000e+00 (1.0885e+00)\tAcc@1  50.78 ( 71.88)\tAcc@5  80.86 ( 91.39)\n",
      "Test: [150/196]\tTime  0.378 ( 0.398)\tLoss 1.3412e+00 (1.2177e+00)\tAcc@1  71.88 ( 69.48)\tAcc@5  85.55 ( 89.61)\n",
      "epoch 3 1.2367926652686176 68.38800048828125 0.008000000000000002 2353956 0.10040116859931866\n",
      "Epoch: [4][   0/5005]\tTime  2.992 ( 2.992)\tData  2.427 ( 2.427)\tLoss 9.7877e-01 (9.7877e-01)\n",
      "Epoch: [4][  50/5005]\tTime  0.560 ( 0.607)\tData  0.000 ( 0.048)\tLoss 1.1181e+00 (1.1804e+00)\n",
      "Epoch: [4][ 100/5005]\tTime  0.560 ( 0.584)\tData  0.000 ( 0.024)\tLoss 1.0581e+00 (1.1789e+00)\n",
      "Epoch: [4][ 150/5005]\tTime  0.559 ( 0.576)\tData  0.000 ( 0.016)\tLoss 1.1698e+00 (1.1818e+00)\n",
      "Epoch: [4][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.012)\tLoss 1.2068e+00 (1.1806e+00)\n",
      "Epoch: [4][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.2321e+00 (1.1798e+00)\n",
      "Epoch: [4][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.2382e+00 (1.1857e+00)\n",
      "Epoch: [4][ 350/5005]\tTime  0.560 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.2480e+00 (1.1876e+00)\n",
      "Epoch: [4][ 400/5005]\tTime  0.560 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.2295e+00 (1.1916e+00)\n",
      "Epoch: [4][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0650e+00 (1.1934e+00)\n",
      "Epoch: [4][ 500/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.3510e+00 (1.1930e+00)\n",
      "Epoch: [4][ 550/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.1987e-01 (1.1937e+00)\n",
      "Epoch: [4][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.4077e+00 (1.1938e+00)\n",
      "Epoch: [4][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1175e+00 (1.1947e+00)\n",
      "Epoch: [4][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0197e+00 (1.1978e+00)\n",
      "Epoch: [4][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.003)\tLoss 1.4075e+00 (1.1959e+00)\n",
      "Epoch: [4][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2099e+00 (1.1960e+00)\n",
      "Epoch: [4][ 850/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0803e+00 (1.1956e+00)\n",
      "Epoch: [4][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3231e+00 (1.1968e+00)\n",
      "Epoch: [4][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2400e+00 (1.1961e+00)\n",
      "Epoch: [4][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3609e+00 (1.1973e+00)\n",
      "Epoch: [4][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1280e+00 (1.1973e+00)\n",
      "Epoch: [4][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1993e+00 (1.1974e+00)\n",
      "Epoch: [4][1150/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3157e+00 (1.1977e+00)\n",
      "Epoch: [4][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1802e+00 (1.1979e+00)\n",
      "Epoch: [4][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1307e+00 (1.1996e+00)\n",
      "Epoch: [4][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2766e+00 (1.2000e+00)\n",
      "Epoch: [4][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3656e+00 (1.2005e+00)\n",
      "Epoch: [4][1400/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2001e+00 (1.2009e+00)\n",
      "Epoch: [4][1450/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2164e+00 (1.2010e+00)\n",
      "Epoch: [4][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2236e+00 (1.2016e+00)\n",
      "Epoch: [4][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8431e-01 (1.2013e+00)\n",
      "Epoch: [4][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1707e+00 (1.2018e+00)\n",
      "Epoch: [4][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0518e+00 (1.2013e+00)\n",
      "Epoch: [4][1700/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2486e+00 (1.2013e+00)\n",
      "Epoch: [4][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0574e+00 (1.2011e+00)\n",
      "Epoch: [4][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1830e+00 (1.2010e+00)\n",
      "Epoch: [4][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1987e+00 (1.2005e+00)\n",
      "Epoch: [4][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1330e+00 (1.2008e+00)\n",
      "Epoch: [4][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1843e+00 (1.2004e+00)\n",
      "Epoch: [4][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2386e+00 (1.2004e+00)\n",
      "Epoch: [4][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1707e+00 (1.2008e+00)\n",
      "Epoch: [4][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0440e+00 (1.2010e+00)\n",
      "Epoch: [4][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2390e+00 (1.2010e+00)\n",
      "Epoch: [4][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0865e+00 (1.2008e+00)\n",
      "Epoch: [4][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1373e+00 (1.2009e+00)\n",
      "Epoch: [4][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2883e+00 (1.2015e+00)\n",
      "Epoch: [4][2350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.5097e+00 (1.2017e+00)\n",
      "Epoch: [4][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3035e+00 (1.2018e+00)\n",
      "Epoch: [4][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3145e+00 (1.2018e+00)\n",
      "Epoch: [4][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4003e+00 (1.2018e+00)\n",
      "Epoch: [4][2550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0696e+00 (1.2016e+00)\n",
      "Epoch: [4][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1946e+00 (1.2012e+00)\n",
      "Epoch: [4][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1854e+00 (1.2019e+00)\n",
      "Epoch: [4][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3562e+00 (1.2018e+00)\n",
      "Epoch: [4][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4483e+00 (1.2023e+00)\n",
      "Epoch: [4][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0219e+00 (1.2026e+00)\n",
      "Epoch: [4][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1380e+00 (1.2029e+00)\n",
      "Epoch: [4][2900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2334e+00 (1.2031e+00)\n",
      "Epoch: [4][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3365e+00 (1.2030e+00)\n",
      "Epoch: [4][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1869e+00 (1.2031e+00)\n",
      "Epoch: [4][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2957e+00 (1.2037e+00)\n",
      "Epoch: [4][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2252e+00 (1.2045e+00)\n",
      "Epoch: [4][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3504e+00 (1.2050e+00)\n",
      "Epoch: [4][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1633e+00 (1.2050e+00)\n",
      "Epoch: [4][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0800e+00 (1.2048e+00)\n",
      "Epoch: [4][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1948e+00 (1.2050e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [4][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0611e+00 (1.2050e+00)\n",
      "Epoch: [4][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1410e+00 (1.2052e+00)\n",
      "Epoch: [4][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3577e+00 (1.2051e+00)\n",
      "Epoch: [4][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1849e+00 (1.2049e+00)\n",
      "Epoch: [4][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1206e+00 (1.2048e+00)\n",
      "Epoch: [4][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0442e+00 (1.2048e+00)\n",
      "Epoch: [4][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3940e+00 (1.2050e+00)\n",
      "Epoch: [4][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1926e+00 (1.2052e+00)\n",
      "Epoch: [4][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1362e+00 (1.2053e+00)\n",
      "Epoch: [4][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1744e+00 (1.2057e+00)\n",
      "Epoch: [4][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1817e+00 (1.2056e+00)\n",
      "Epoch: [4][3900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2453e+00 (1.2056e+00)\n",
      "Epoch: [4][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0901e+00 (1.2055e+00)\n",
      "Epoch: [4][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1208e+00 (1.2058e+00)\n",
      "Epoch: [4][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2697e+00 (1.2062e+00)\n",
      "Epoch: [4][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1250e+00 (1.2062e+00)\n",
      "Epoch: [4][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3268e+00 (1.2063e+00)\n",
      "Epoch: [4][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2720e+00 (1.2062e+00)\n",
      "Epoch: [4][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2629e+00 (1.2062e+00)\n",
      "Epoch: [4][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2074e+00 (1.2064e+00)\n",
      "Epoch: [4][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2169e+00 (1.2065e+00)\n",
      "Epoch: [4][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6248e-01 (1.2063e+00)\n",
      "Epoch: [4][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3625e+00 (1.2065e+00)\n",
      "Epoch: [4][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2356e+00 (1.2065e+00)\n",
      "Epoch: [4][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2766e+00 (1.2066e+00)\n",
      "Epoch: [4][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0906e+00 (1.2068e+00)\n",
      "Epoch: [4][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3250e+00 (1.2069e+00)\n",
      "Epoch: [4][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3595e+00 (1.2072e+00)\n",
      "Epoch: [4][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2860e+00 (1.2074e+00)\n",
      "Epoch: [4][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3153e+00 (1.2076e+00)\n",
      "Epoch: [4][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2398e+00 (1.2079e+00)\n",
      "Epoch: [4][4900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4809e+00 (1.2081e+00)\n",
      "Epoch: [4][4950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2431e+00 (1.2081e+00)\n",
      "Epoch: [4][5000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1908e+00 (1.2084e+00)\n",
      "Test: [  0/196]\tTime  3.318 ( 3.318)\tLoss 7.7605e-01 (7.7605e-01)\tAcc@1  77.73 ( 77.73)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 7.9836e-01 (9.3014e-01)\tAcc@1  79.30 ( 75.05)\tAcc@5  94.92 ( 93.31)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.7298e+00 (1.0742e+00)\tAcc@1  53.52 ( 71.98)\tAcc@5  85.94 ( 91.65)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.2420e+00 (1.2132e+00)\tAcc@1  71.88 ( 69.53)\tAcc@5  88.67 ( 89.65)\n",
      "epoch 4 1.2083946833857635 68.63999938964844 0.0075000000000000015 2353956 0.10040116859931866\n",
      "Epoch: [5][   0/5005]\tTime  2.940 ( 2.940)\tData  2.377 ( 2.377)\tLoss 1.0718e+00 (1.0718e+00)\n",
      "Epoch: [5][  50/5005]\tTime  0.559 ( 0.606)\tData  0.000 ( 0.047)\tLoss 1.1886e+00 (1.1741e+00)\n",
      "Epoch: [5][ 100/5005]\tTime  0.560 ( 0.583)\tData  0.000 ( 0.024)\tLoss 1.0609e+00 (1.1780e+00)\n",
      "Epoch: [5][ 150/5005]\tTime  0.559 ( 0.575)\tData  0.000 ( 0.016)\tLoss 1.0019e+00 (1.1736e+00)\n",
      "Epoch: [5][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.012)\tLoss 1.1550e+00 (1.1719e+00)\n",
      "Epoch: [5][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.2109e+00 (1.1736e+00)\n",
      "Epoch: [5][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.1840e+00 (1.1728e+00)\n",
      "Epoch: [5][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.1281e+00 (1.1730e+00)\n",
      "Epoch: [5][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.2550e+00 (1.1741e+00)\n",
      "Epoch: [5][ 450/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.1960e+00 (1.1738e+00)\n",
      "Epoch: [5][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.3531e+00 (1.1747e+00)\n",
      "Epoch: [5][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.3031e+00 (1.1762e+00)\n",
      "Epoch: [5][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.2410e+00 (1.1787e+00)\n",
      "Epoch: [5][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0582e+00 (1.1808e+00)\n",
      "Epoch: [5][ 700/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.2477e+00 (1.1789e+00)\n",
      "Epoch: [5][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1069e+00 (1.1799e+00)\n",
      "Epoch: [5][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.8368e-01 (1.1788e+00)\n",
      "Epoch: [5][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2024e+00 (1.1784e+00)\n",
      "Epoch: [5][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2871e+00 (1.1803e+00)\n",
      "Epoch: [5][ 950/5005]\tTime  0.563 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1016e+00 (1.1803e+00)\n",
      "Epoch: [5][1000/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1379e+00 (1.1795e+00)\n",
      "Epoch: [5][1050/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3854e+00 (1.1808e+00)\n",
      "Epoch: [5][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1228e+00 (1.1793e+00)\n",
      "Epoch: [5][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1300e+00 (1.1795e+00)\n",
      "Epoch: [5][1200/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1483e+00 (1.1791e+00)\n",
      "Epoch: [5][1250/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2740e+00 (1.1790e+00)\n",
      "Epoch: [5][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1887e+00 (1.1790e+00)\n",
      "Epoch: [5][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2200e+00 (1.1784e+00)\n",
      "Epoch: [5][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0816e+00 (1.1775e+00)\n",
      "Epoch: [5][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2560e+00 (1.1785e+00)\n",
      "Epoch: [5][1500/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.4100e+00 (1.1790e+00)\n",
      "Epoch: [5][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1348e+00 (1.1789e+00)\n",
      "Epoch: [5][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2001e+00 (1.1793e+00)\n",
      "Epoch: [5][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2037e+00 (1.1794e+00)\n",
      "Epoch: [5][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1101e+00 (1.1795e+00)\n",
      "Epoch: [5][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3382e+00 (1.1797e+00)\n",
      "Epoch: [5][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0265e+00 (1.1795e+00)\n",
      "Epoch: [5][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2084e+00 (1.1794e+00)\n",
      "Epoch: [5][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2573e+00 (1.1795e+00)\n",
      "Epoch: [5][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2702e+00 (1.1796e+00)\n",
      "Epoch: [5][2000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3630e-01 (1.1795e+00)\n",
      "Epoch: [5][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0483e+00 (1.1795e+00)\n",
      "Epoch: [5][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0996e+00 (1.1798e+00)\n",
      "Epoch: [5][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2171e+00 (1.1800e+00)\n",
      "Epoch: [5][2200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2095e+00 (1.1804e+00)\n",
      "Epoch: [5][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2757e+00 (1.1809e+00)\n",
      "Epoch: [5][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1930e+00 (1.1812e+00)\n",
      "Epoch: [5][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2659e+00 (1.1816e+00)\n",
      "Epoch: [5][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1076e+00 (1.1824e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [5][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0458e+00 (1.1827e+00)\n",
      "Epoch: [5][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3112e+00 (1.1829e+00)\n",
      "Epoch: [5][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0029e+00 (1.1829e+00)\n",
      "Epoch: [5][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1968e+00 (1.1835e+00)\n",
      "Epoch: [5][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1211e+00 (1.1838e+00)\n",
      "Epoch: [5][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3254e+00 (1.1843e+00)\n",
      "Epoch: [5][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1805e+00 (1.1847e+00)\n",
      "Epoch: [5][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3200e+00 (1.1849e+00)\n",
      "Epoch: [5][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0836e+00 (1.1850e+00)\n",
      "Epoch: [5][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3514e+00 (1.1849e+00)\n",
      "Epoch: [5][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0837e+00 (1.1850e+00)\n",
      "Epoch: [5][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1684e+00 (1.1852e+00)\n",
      "Epoch: [5][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3379e+00 (1.1851e+00)\n",
      "Epoch: [5][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1135e+00 (1.1846e+00)\n",
      "Epoch: [5][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2486e+00 (1.1844e+00)\n",
      "Epoch: [5][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3290e+00 (1.1845e+00)\n",
      "Epoch: [5][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2759e+00 (1.1847e+00)\n",
      "Epoch: [5][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1514e+00 (1.1849e+00)\n",
      "Epoch: [5][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2350e+00 (1.1849e+00)\n",
      "Epoch: [5][3400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3004e+00 (1.1850e+00)\n",
      "Epoch: [5][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0315e+00 (1.1846e+00)\n",
      "Epoch: [5][3500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0273e+00 (1.1848e+00)\n",
      "Epoch: [5][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3027e+00 (1.1852e+00)\n",
      "Epoch: [5][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2393e+00 (1.1853e+00)\n",
      "Epoch: [5][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1532e+00 (1.1855e+00)\n",
      "Epoch: [5][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2236e+00 (1.1855e+00)\n",
      "Epoch: [5][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1858e+00 (1.1855e+00)\n",
      "Epoch: [5][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1626e+00 (1.1857e+00)\n",
      "Epoch: [5][3850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1527e+00 (1.1855e+00)\n",
      "Epoch: [5][3900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0259e+00 (1.1855e+00)\n",
      "Epoch: [5][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2540e+00 (1.1857e+00)\n",
      "Epoch: [5][4000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1563e+00 (1.1861e+00)\n",
      "Epoch: [5][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2594e+00 (1.1863e+00)\n",
      "Epoch: [5][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1876e+00 (1.1867e+00)\n",
      "Epoch: [5][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0217e+00 (1.1868e+00)\n",
      "Epoch: [5][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2297e+00 (1.1869e+00)\n",
      "Epoch: [5][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2812e+00 (1.1869e+00)\n",
      "Epoch: [5][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0579e+00 (1.1868e+00)\n",
      "Epoch: [5][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2408e+00 (1.1871e+00)\n",
      "Epoch: [5][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2078e+00 (1.1871e+00)\n",
      "Epoch: [5][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1665e+00 (1.1873e+00)\n",
      "Epoch: [5][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3519e+00 (1.1875e+00)\n",
      "Epoch: [5][4550/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1920e+00 (1.1878e+00)\n",
      "Epoch: [5][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1699e+00 (1.1878e+00)\n",
      "Epoch: [5][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2316e+00 (1.1878e+00)\n",
      "Epoch: [5][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2237e+00 (1.1877e+00)\n",
      "Epoch: [5][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2038e+00 (1.1880e+00)\n",
      "Epoch: [5][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3490e+00 (1.1879e+00)\n",
      "Epoch: [5][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2576e+00 (1.1882e+00)\n",
      "Epoch: [5][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3035e+00 (1.1880e+00)\n",
      "Epoch: [5][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1792e+00 (1.1882e+00)\n",
      "Epoch: [5][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1698e+00 (1.1880e+00)\n",
      "Test: [  0/196]\tTime  3.421 ( 3.421)\tLoss 6.8640e-01 (6.8640e-01)\tAcc@1  80.86 ( 80.86)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.437)\tLoss 6.5155e-01 (9.7346e-01)\tAcc@1  83.59 ( 74.26)\tAcc@5  96.48 ( 92.91)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.7053e+00 (1.1063e+00)\tAcc@1  54.30 ( 71.52)\tAcc@5  84.77 ( 91.39)\n",
      "Test: [150/196]\tTime  0.378 ( 0.398)\tLoss 1.3118e+00 (1.2390e+00)\tAcc@1  71.48 ( 69.17)\tAcc@5  84.38 ( 89.34)\n",
      "epoch 5 1.188000334438239 68.18999481201172 0.007000000000000001 2353956 0.10040116859931866\n",
      "Epoch: [6][   0/5005]\tTime  3.135 ( 3.135)\tData  2.575 ( 2.575)\tLoss 1.0830e+00 (1.0830e+00)\n",
      "Epoch: [6][  50/5005]\tTime  0.558 ( 0.609)\tData  0.000 ( 0.051)\tLoss 1.1886e+00 (1.1783e+00)\n",
      "Epoch: [6][ 100/5005]\tTime  0.558 ( 0.584)\tData  0.000 ( 0.026)\tLoss 1.0606e+00 (1.1624e+00)\n",
      "Epoch: [6][ 150/5005]\tTime  0.558 ( 0.576)\tData  0.000 ( 0.017)\tLoss 1.1289e+00 (1.1559e+00)\n",
      "Epoch: [6][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.013)\tLoss 1.1027e+00 (1.1588e+00)\n",
      "Epoch: [6][ 250/5005]\tTime  0.560 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.3085e+00 (1.1570e+00)\n",
      "Epoch: [6][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.009)\tLoss 1.1219e+00 (1.1574e+00)\n",
      "Epoch: [6][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.008)\tLoss 1.2402e+00 (1.1561e+00)\n",
      "Epoch: [6][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.007)\tLoss 1.1618e+00 (1.1573e+00)\n",
      "Epoch: [6][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 9.9671e-01 (1.1554e+00)\n",
      "Epoch: [6][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.3009e+00 (1.1547e+00)\n",
      "Epoch: [6][ 550/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.2254e+00 (1.1566e+00)\n",
      "Epoch: [6][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0242e+00 (1.1561e+00)\n",
      "Epoch: [6][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1805e+00 (1.1574e+00)\n",
      "Epoch: [6][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.2274e+00 (1.1563e+00)\n",
      "Epoch: [6][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.3852e+00 (1.1568e+00)\n",
      "Epoch: [6][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0436e+00 (1.1565e+00)\n",
      "Epoch: [6][ 850/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2081e+00 (1.1567e+00)\n",
      "Epoch: [6][ 900/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2130e+00 (1.1560e+00)\n",
      "Epoch: [6][ 950/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1950e+00 (1.1567e+00)\n",
      "Epoch: [6][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2633e+00 (1.1587e+00)\n",
      "Epoch: [6][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1198e+00 (1.1578e+00)\n",
      "Epoch: [6][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.003)\tLoss 1.1586e+00 (1.1583e+00)\n",
      "Epoch: [6][1150/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1701e+00 (1.1575e+00)\n",
      "Epoch: [6][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5339e-01 (1.1574e+00)\n",
      "Epoch: [6][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1402e+00 (1.1581e+00)\n",
      "Epoch: [6][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0202e+00 (1.1573e+00)\n",
      "Epoch: [6][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2690e+00 (1.1581e+00)\n",
      "Epoch: [6][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1972e+00 (1.1582e+00)\n",
      "Epoch: [6][1450/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.4130e+00 (1.1591e+00)\n",
      "Epoch: [6][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0933e+00 (1.1591e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [6][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2779e+00 (1.1592e+00)\n",
      "Epoch: [6][1600/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1900e+00 (1.1579e+00)\n",
      "Epoch: [6][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0797e+00 (1.1580e+00)\n",
      "Epoch: [6][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1289e+00 (1.1573e+00)\n",
      "Epoch: [6][1750/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0323e+00 (1.1573e+00)\n",
      "Epoch: [6][1800/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0358e+00 (1.1572e+00)\n",
      "Epoch: [6][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0998e+00 (1.1581e+00)\n",
      "Epoch: [6][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3044e+00 (1.1584e+00)\n",
      "Epoch: [6][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.2267e+00 (1.1584e+00)\n",
      "Epoch: [6][2000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2579e+00 (1.1585e+00)\n",
      "Epoch: [6][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1342e+00 (1.1581e+00)\n",
      "Epoch: [6][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2032e+00 (1.1584e+00)\n",
      "Epoch: [6][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2933e+00 (1.1585e+00)\n",
      "Epoch: [6][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1597e+00 (1.1587e+00)\n",
      "Epoch: [6][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0486e+00 (1.1592e+00)\n",
      "Epoch: [6][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0042e+00 (1.1593e+00)\n",
      "Epoch: [6][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2162e+00 (1.1595e+00)\n",
      "Epoch: [6][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0955e+00 (1.1598e+00)\n",
      "Epoch: [6][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0864e+00 (1.1599e+00)\n",
      "Epoch: [6][2500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1609e+00 (1.1604e+00)\n",
      "Epoch: [6][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1657e+00 (1.1608e+00)\n",
      "Epoch: [6][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3924e-01 (1.1610e+00)\n",
      "Epoch: [6][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2026e+00 (1.1615e+00)\n",
      "Epoch: [6][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2001e+00 (1.1614e+00)\n",
      "Epoch: [6][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0194e+00 (1.1613e+00)\n",
      "Epoch: [6][2800/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1274e+00 (1.1610e+00)\n",
      "Epoch: [6][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2261e+00 (1.1615e+00)\n",
      "Epoch: [6][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1851e+00 (1.1618e+00)\n",
      "Epoch: [6][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0741e+00 (1.1620e+00)\n",
      "Epoch: [6][3000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0957e+00 (1.1623e+00)\n",
      "Epoch: [6][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.4022e+00 (1.1626e+00)\n",
      "Epoch: [6][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2307e+00 (1.1626e+00)\n",
      "Epoch: [6][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1526e+00 (1.1627e+00)\n",
      "Epoch: [6][3200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1360e+00 (1.1631e+00)\n",
      "Epoch: [6][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2131e+00 (1.1630e+00)\n",
      "Epoch: [6][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2520e+00 (1.1632e+00)\n",
      "Epoch: [6][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3283e+00 (1.1635e+00)\n",
      "Epoch: [6][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2810e+00 (1.1635e+00)\n",
      "Epoch: [6][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1876e+00 (1.1632e+00)\n",
      "Epoch: [6][3500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0569e+00 (1.1631e+00)\n",
      "Epoch: [6][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3755e+00 (1.1632e+00)\n",
      "Epoch: [6][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1119e+00 (1.1633e+00)\n",
      "Epoch: [6][3650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1922e+00 (1.1640e+00)\n",
      "Epoch: [6][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1336e+00 (1.1641e+00)\n",
      "Epoch: [6][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0369e+00 (1.1643e+00)\n",
      "Epoch: [6][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2899e+00 (1.1645e+00)\n",
      "Epoch: [6][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0997e+00 (1.1646e+00)\n",
      "Epoch: [6][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1852e+00 (1.1647e+00)\n",
      "Epoch: [6][3950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0807e+00 (1.1644e+00)\n",
      "Epoch: [6][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3246e+00 (1.1647e+00)\n",
      "Epoch: [6][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2579e+00 (1.1648e+00)\n",
      "Epoch: [6][4100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2094e+00 (1.1650e+00)\n",
      "Epoch: [6][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3062e+00 (1.1652e+00)\n",
      "Epoch: [6][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1652e+00 (1.1654e+00)\n",
      "Epoch: [6][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2782e+00 (1.1655e+00)\n",
      "Epoch: [6][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1876e+00 (1.1656e+00)\n",
      "Epoch: [6][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1695e+00 (1.1659e+00)\n",
      "Epoch: [6][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1437e+00 (1.1661e+00)\n",
      "Epoch: [6][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1694e+00 (1.1661e+00)\n",
      "Epoch: [6][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0379e+00 (1.1664e+00)\n",
      "Epoch: [6][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1288e+00 (1.1667e+00)\n",
      "Epoch: [6][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2268e+00 (1.1667e+00)\n",
      "Epoch: [6][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0787e+00 (1.1671e+00)\n",
      "Epoch: [6][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2909e+00 (1.1669e+00)\n",
      "Epoch: [6][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0896e+00 (1.1670e+00)\n",
      "Epoch: [6][4800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0916e+00 (1.1674e+00)\n",
      "Epoch: [6][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1153e+00 (1.1677e+00)\n",
      "Epoch: [6][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0909e+00 (1.1676e+00)\n",
      "Epoch: [6][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1281e+00 (1.1677e+00)\n",
      "Epoch: [6][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8545e-01 (1.1677e+00)\n",
      "Test: [  0/196]\tTime  3.315 ( 3.315)\tLoss 7.1708e-01 (7.1708e-01)\tAcc@1  79.69 ( 79.69)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 6.3532e-01 (9.1571e-01)\tAcc@1  84.38 ( 75.43)\tAcc@5  96.48 ( 93.50)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.7822e+00 (1.0679e+00)\tAcc@1  49.22 ( 72.17)\tAcc@5  82.42 ( 91.77)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.2519e+00 (1.1910e+00)\tAcc@1  75.39 ( 69.86)\tAcc@5  88.28 ( 89.94)\n",
      "epoch 6 1.167731972890424 69.08599853515625 0.006500000000000002 2353956 0.10040116859931866\n",
      "Epoch: [7][   0/5005]\tTime  3.264 ( 3.264)\tData  2.702 ( 2.702)\tLoss 1.1002e+00 (1.1002e+00)\n",
      "Epoch: [7][  50/5005]\tTime  0.560 ( 0.612)\tData  0.000 ( 0.053)\tLoss 1.1047e+00 (1.1433e+00)\n",
      "Epoch: [7][ 100/5005]\tTime  0.560 ( 0.586)\tData  0.000 ( 0.027)\tLoss 1.0896e+00 (1.1382e+00)\n",
      "Epoch: [7][ 150/5005]\tTime  0.559 ( 0.577)\tData  0.000 ( 0.018)\tLoss 1.1607e+00 (1.1358e+00)\n",
      "Epoch: [7][ 200/5005]\tTime  0.560 ( 0.573)\tData  0.000 ( 0.014)\tLoss 1.1621e+00 (1.1352e+00)\n",
      "Epoch: [7][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.2168e+00 (1.1361e+00)\n",
      "Epoch: [7][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.0981e+00 (1.1330e+00)\n",
      "Epoch: [7][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.1205e+00 (1.1330e+00)\n",
      "Epoch: [7][ 400/5005]\tTime  0.560 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.1440e+00 (1.1311e+00)\n",
      "Epoch: [7][ 450/5005]\tTime  0.560 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1567e+00 (1.1295e+00)\n",
      "Epoch: [7][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1422e+00 (1.1332e+00)\n",
      "Epoch: [7][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0563e+00 (1.1341e+00)\n",
      "Epoch: [7][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.2656e+00 (1.1321e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [7][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0122e+00 (1.1326e+00)\n",
      "Epoch: [7][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1959e+00 (1.1346e+00)\n",
      "Epoch: [7][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0631e+00 (1.1343e+00)\n",
      "Epoch: [7][ 800/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0941e+00 (1.1353e+00)\n",
      "Epoch: [7][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.3405e-01 (1.1331e+00)\n",
      "Epoch: [7][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1820e+00 (1.1327e+00)\n",
      "Epoch: [7][ 950/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0807e+00 (1.1351e+00)\n",
      "Epoch: [7][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0538e+00 (1.1368e+00)\n",
      "Epoch: [7][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1302e+00 (1.1372e+00)\n",
      "Epoch: [7][1100/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1393e+00 (1.1382e+00)\n",
      "Epoch: [7][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2211e+00 (1.1389e+00)\n",
      "Epoch: [7][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1279e+00 (1.1384e+00)\n",
      "Epoch: [7][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2770e-01 (1.1380e+00)\n",
      "Epoch: [7][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2064e+00 (1.1385e+00)\n",
      "Epoch: [7][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1855e+00 (1.1379e+00)\n",
      "Epoch: [7][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1533e+00 (1.1383e+00)\n",
      "Epoch: [7][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0381e+00 (1.1387e+00)\n",
      "Epoch: [7][1500/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0454e+00 (1.1382e+00)\n",
      "Epoch: [7][1550/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1877e+00 (1.1385e+00)\n",
      "Epoch: [7][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3753e+00 (1.1387e+00)\n",
      "Epoch: [7][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1104e+00 (1.1394e+00)\n",
      "Epoch: [7][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9567e-01 (1.1392e+00)\n",
      "Epoch: [7][1750/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3384e+00 (1.1393e+00)\n",
      "Epoch: [7][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0598e+00 (1.1396e+00)\n",
      "Epoch: [7][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1492e+00 (1.1396e+00)\n",
      "Epoch: [7][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1711e+00 (1.1394e+00)\n",
      "Epoch: [7][1950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3979e+00 (1.1395e+00)\n",
      "Epoch: [7][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0877e+00 (1.1402e+00)\n",
      "Epoch: [7][2050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0869e+00 (1.1411e+00)\n",
      "Epoch: [7][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6739e-01 (1.1409e+00)\n",
      "Epoch: [7][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1015e+00 (1.1412e+00)\n",
      "Epoch: [7][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1487e+00 (1.1409e+00)\n",
      "Epoch: [7][2250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1908e+00 (1.1409e+00)\n",
      "Epoch: [7][2300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0018e+00 (1.1406e+00)\n",
      "Epoch: [7][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1614e+00 (1.1404e+00)\n",
      "Epoch: [7][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3511e+00 (1.1410e+00)\n",
      "Epoch: [7][2450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3124e+00 (1.1414e+00)\n",
      "Epoch: [7][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1958e+00 (1.1409e+00)\n",
      "Epoch: [7][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1582e+00 (1.1412e+00)\n",
      "Epoch: [7][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1561e+00 (1.1416e+00)\n",
      "Epoch: [7][2650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2102e+00 (1.1421e+00)\n",
      "Epoch: [7][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2237e+00 (1.1420e+00)\n",
      "Epoch: [7][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1530e+00 (1.1422e+00)\n",
      "Epoch: [7][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1884e+00 (1.1424e+00)\n",
      "Epoch: [7][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0157e+00 (1.1427e+00)\n",
      "Epoch: [7][2900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0305e+00 (1.1430e+00)\n",
      "Epoch: [7][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2333e+00 (1.1432e+00)\n",
      "Epoch: [7][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0797e+00 (1.1434e+00)\n",
      "Epoch: [7][3050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0432e+00 (1.1434e+00)\n",
      "Epoch: [7][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1147e+00 (1.1438e+00)\n",
      "Epoch: [7][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1237e+00 (1.1440e+00)\n",
      "Epoch: [7][3200/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7806e-01 (1.1442e+00)\n",
      "Epoch: [7][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1736e+00 (1.1445e+00)\n",
      "Epoch: [7][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1486e+00 (1.1449e+00)\n",
      "Epoch: [7][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1849e+00 (1.1450e+00)\n",
      "Epoch: [7][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0514e+00 (1.1450e+00)\n",
      "Epoch: [7][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2026e+00 (1.1456e+00)\n",
      "Epoch: [7][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2087e+00 (1.1452e+00)\n",
      "Epoch: [7][3550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2907e+00 (1.1456e+00)\n",
      "Epoch: [7][3600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1501e+00 (1.1458e+00)\n",
      "Epoch: [7][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2309e+00 (1.1456e+00)\n",
      "Epoch: [7][3700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0583e+00 (1.1458e+00)\n",
      "Epoch: [7][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3245e+00 (1.1461e+00)\n",
      "Epoch: [7][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2136e+00 (1.1460e+00)\n",
      "Epoch: [7][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0951e+00 (1.1462e+00)\n",
      "Epoch: [7][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0729e+00 (1.1460e+00)\n",
      "Epoch: [7][3950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1456e+00 (1.1463e+00)\n",
      "Epoch: [7][4000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1618e+00 (1.1465e+00)\n",
      "Epoch: [7][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2176e+00 (1.1467e+00)\n",
      "Epoch: [7][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1921e+00 (1.1468e+00)\n",
      "Epoch: [7][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1399e+00 (1.1469e+00)\n",
      "Epoch: [7][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1829e+00 (1.1472e+00)\n",
      "Epoch: [7][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2144e+00 (1.1471e+00)\n",
      "Epoch: [7][4300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2532e+00 (1.1472e+00)\n",
      "Epoch: [7][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0830e+00 (1.1471e+00)\n",
      "Epoch: [7][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1793e+00 (1.1473e+00)\n",
      "Epoch: [7][4450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1596e+00 (1.1473e+00)\n",
      "Epoch: [7][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2187e+00 (1.1476e+00)\n",
      "Epoch: [7][4550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1119e+00 (1.1477e+00)\n",
      "Epoch: [7][4600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2969e+00 (1.1477e+00)\n",
      "Epoch: [7][4650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0896e+00 (1.1478e+00)\n",
      "Epoch: [7][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2550e+00 (1.1481e+00)\n",
      "Epoch: [7][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5458e-01 (1.1482e+00)\n",
      "Epoch: [7][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3807e+00 (1.1484e+00)\n",
      "Epoch: [7][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2883e+00 (1.1482e+00)\n",
      "Epoch: [7][4900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1589e+00 (1.1481e+00)\n",
      "Epoch: [7][4950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3731e-01 (1.1482e+00)\n",
      "Epoch: [7][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1754e+00 (1.1482e+00)\n",
      "Test: [  0/196]\tTime  3.354 ( 3.354)\tLoss 7.1679e-01 (7.1679e-01)\tAcc@1  79.69 ( 79.69)\tAcc@5  95.70 ( 95.70)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 5.4727e-01 (9.0674e-01)\tAcc@1  84.77 ( 75.97)\tAcc@5  97.27 ( 93.57)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.4483e+00 (1.0343e+00)\tAcc@1  62.50 ( 73.22)\tAcc@5  87.89 ( 92.20)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.4907e+00 (1.1656e+00)\tAcc@1  70.70 ( 70.90)\tAcc@5  84.38 ( 90.29)\n",
      "epoch 7 1.148222963404673 69.8479995727539 0.006000000000000002 2353956 0.10040116859931866\n",
      "Epoch: [8][   0/5005]\tTime  3.235 ( 3.235)\tData  2.671 ( 2.671)\tLoss 1.0094e+00 (1.0094e+00)\n",
      "Epoch: [8][  50/5005]\tTime  0.559 ( 0.612)\tData  0.000 ( 0.053)\tLoss 1.1515e+00 (1.1202e+00)\n",
      "Epoch: [8][ 100/5005]\tTime  0.560 ( 0.586)\tData  0.000 ( 0.027)\tLoss 1.1940e+00 (1.1163e+00)\n",
      "Epoch: [8][ 150/5005]\tTime  0.560 ( 0.577)\tData  0.000 ( 0.018)\tLoss 1.0851e+00 (1.1119e+00)\n",
      "Epoch: [8][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.013)\tLoss 1.1544e+00 (1.1166e+00)\n",
      "Epoch: [8][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.0927e+00 (1.1218e+00)\n",
      "Epoch: [8][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.1083e+00 (1.1203e+00)\n",
      "Epoch: [8][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.0166e+00 (1.1204e+00)\n",
      "Epoch: [8][ 400/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.1252e+00 (1.1178e+00)\n",
      "Epoch: [8][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1320e+00 (1.1168e+00)\n",
      "Epoch: [8][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.006)\tLoss 1.1799e+00 (1.1165e+00)\n",
      "Epoch: [8][ 550/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.1131e+00 (1.1152e+00)\n",
      "Epoch: [8][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.1756e+00 (1.1124e+00)\n",
      "Epoch: [8][ 650/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.2256e-01 (1.1146e+00)\n",
      "Epoch: [8][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0656e+00 (1.1148e+00)\n",
      "Epoch: [8][ 750/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1142e+00 (1.1151e+00)\n",
      "Epoch: [8][ 800/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.2863e+00 (1.1155e+00)\n",
      "Epoch: [8][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1706e+00 (1.1169e+00)\n",
      "Epoch: [8][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1817e+00 (1.1168e+00)\n",
      "Epoch: [8][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.6923e-01 (1.1180e+00)\n",
      "Epoch: [8][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0693e+00 (1.1189e+00)\n",
      "Epoch: [8][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1587e+00 (1.1180e+00)\n",
      "Epoch: [8][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1273e+00 (1.1185e+00)\n",
      "Epoch: [8][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2402e+00 (1.1191e+00)\n",
      "Epoch: [8][1200/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1217e+00 (1.1190e+00)\n",
      "Epoch: [8][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1106e+00 (1.1188e+00)\n",
      "Epoch: [8][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1371e+00 (1.1194e+00)\n",
      "Epoch: [8][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0805e+00 (1.1188e+00)\n",
      "Epoch: [8][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1508e+00 (1.1190e+00)\n",
      "Epoch: [8][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1650e+00 (1.1186e+00)\n",
      "Epoch: [8][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1246e+00 (1.1184e+00)\n",
      "Epoch: [8][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.3338e+00 (1.1191e+00)\n",
      "Epoch: [8][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0703e+00 (1.1194e+00)\n",
      "Epoch: [8][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1053e+00 (1.1200e+00)\n",
      "Epoch: [8][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1290e+00 (1.1212e+00)\n",
      "Epoch: [8][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1470e+00 (1.1210e+00)\n",
      "Epoch: [8][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.3906e-01 (1.1212e+00)\n",
      "Epoch: [8][1850/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0245e+00 (1.1217e+00)\n",
      "Epoch: [8][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1745e+00 (1.1220e+00)\n",
      "Epoch: [8][1950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1218e+00 (1.1225e+00)\n",
      "Epoch: [8][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2857e+00 (1.1227e+00)\n",
      "Epoch: [8][2050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9797e-01 (1.1226e+00)\n",
      "Epoch: [8][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1576e+00 (1.1225e+00)\n",
      "Epoch: [8][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1950e+00 (1.1225e+00)\n",
      "Epoch: [8][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0440e+00 (1.1228e+00)\n",
      "Epoch: [8][2250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0506e+00 (1.1229e+00)\n",
      "Epoch: [8][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2420e+00 (1.1232e+00)\n",
      "Epoch: [8][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1523e+00 (1.1231e+00)\n",
      "Epoch: [8][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1152e+00 (1.1234e+00)\n",
      "Epoch: [8][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1625e+00 (1.1236e+00)\n",
      "Epoch: [8][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2720e+00 (1.1243e+00)\n",
      "Epoch: [8][2550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0958e+00 (1.1243e+00)\n",
      "Epoch: [8][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3155e+00 (1.1244e+00)\n",
      "Epoch: [8][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9282e-01 (1.1242e+00)\n",
      "Epoch: [8][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2492e+00 (1.1244e+00)\n",
      "Epoch: [8][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0810e+00 (1.1250e+00)\n",
      "Epoch: [8][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1204e+00 (1.1252e+00)\n",
      "Epoch: [8][2850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2746e+00 (1.1253e+00)\n",
      "Epoch: [8][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0640e+00 (1.1254e+00)\n",
      "Epoch: [8][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2118e+00 (1.1260e+00)\n",
      "Epoch: [8][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0393e+00 (1.1262e+00)\n",
      "Epoch: [8][3050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2842e+00 (1.1266e+00)\n",
      "Epoch: [8][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0203e+00 (1.1268e+00)\n",
      "Epoch: [8][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1973e+00 (1.1268e+00)\n",
      "Epoch: [8][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0931e+00 (1.1267e+00)\n",
      "Epoch: [8][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1558e+00 (1.1269e+00)\n",
      "Epoch: [8][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2433e+00 (1.1267e+00)\n",
      "Epoch: [8][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0519e+00 (1.1270e+00)\n",
      "Epoch: [8][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1696e+00 (1.1270e+00)\n",
      "Epoch: [8][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1446e+00 (1.1273e+00)\n",
      "Epoch: [8][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3353e+00 (1.1274e+00)\n",
      "Epoch: [8][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7338e-01 (1.1275e+00)\n",
      "Epoch: [8][3600/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1296e+00 (1.1276e+00)\n",
      "Epoch: [8][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0947e+00 (1.1277e+00)\n",
      "Epoch: [8][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4805e-01 (1.1279e+00)\n",
      "Epoch: [8][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2921e+00 (1.1279e+00)\n",
      "Epoch: [8][3800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0378e+00 (1.1277e+00)\n",
      "Epoch: [8][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3030e+00 (1.1286e+00)\n",
      "Epoch: [8][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2911e+00 (1.1287e+00)\n",
      "Epoch: [8][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7612e-01 (1.1288e+00)\n",
      "Epoch: [8][4000/5005]\tTime  0.564 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2527e+00 (1.1289e+00)\n",
      "Epoch: [8][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0065e+00 (1.1291e+00)\n",
      "Epoch: [8][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1385e+00 (1.1291e+00)\n",
      "Epoch: [8][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1404e+00 (1.1293e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [8][4200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2927e+00 (1.1294e+00)\n",
      "Epoch: [8][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1103e+00 (1.1292e+00)\n",
      "Epoch: [8][4300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1517e+00 (1.1295e+00)\n",
      "Epoch: [8][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0783e+00 (1.1295e+00)\n",
      "Epoch: [8][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1611e+00 (1.1296e+00)\n",
      "Epoch: [8][4450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2193e+00 (1.1299e+00)\n",
      "Epoch: [8][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4819e-01 (1.1298e+00)\n",
      "Epoch: [8][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1265e+00 (1.1301e+00)\n",
      "Epoch: [8][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2026e+00 (1.1302e+00)\n",
      "Epoch: [8][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0718e+00 (1.1302e+00)\n",
      "Epoch: [8][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1581e+00 (1.1305e+00)\n",
      "Epoch: [8][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3397e+00 (1.1307e+00)\n",
      "Epoch: [8][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2708e+00 (1.1306e+00)\n",
      "Epoch: [8][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1561e+00 (1.1307e+00)\n",
      "Epoch: [8][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1089e+00 (1.1307e+00)\n",
      "Epoch: [8][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1361e+00 (1.1307e+00)\n",
      "Epoch: [8][5000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1767e+00 (1.1308e+00)\n",
      "Test: [  0/196]\tTime  3.327 ( 3.327)\tLoss 6.1394e-01 (6.1394e-01)\tAcc@1  83.20 ( 83.20)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 5.3838e-01 (8.7462e-01)\tAcc@1  86.72 ( 76.66)\tAcc@5  97.27 ( 93.93)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.4670e+00 (1.0150e+00)\tAcc@1  60.16 ( 73.57)\tAcc@5  87.11 ( 92.32)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.4665e+00 (1.1484e+00)\tAcc@1  68.36 ( 71.05)\tAcc@5  85.94 ( 90.48)\n",
      "epoch 8 1.130835646229324 69.92599487304688 0.005500000000000002 2353956 0.10040116859931866\n",
      "Epoch: [9][   0/5005]\tTime  3.235 ( 3.235)\tData  2.675 ( 2.675)\tLoss 9.4133e-01 (9.4133e-01)\n",
      "Epoch: [9][  50/5005]\tTime  0.561 ( 0.612)\tData  0.000 ( 0.053)\tLoss 1.0907e+00 (1.0920e+00)\n",
      "Epoch: [9][ 100/5005]\tTime  0.560 ( 0.586)\tData  0.000 ( 0.027)\tLoss 1.0282e+00 (1.0875e+00)\n",
      "Epoch: [9][ 150/5005]\tTime  0.560 ( 0.577)\tData  0.000 ( 0.018)\tLoss 9.3943e-01 (1.0874e+00)\n",
      "Epoch: [9][ 200/5005]\tTime  0.559 ( 0.573)\tData  0.000 ( 0.014)\tLoss 1.1104e+00 (1.0828e+00)\n",
      "Epoch: [9][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.0649e+00 (1.0839e+00)\n",
      "Epoch: [9][ 300/5005]\tTime  0.560 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.2318e+00 (1.0888e+00)\n",
      "Epoch: [9][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 9.4458e-01 (1.0917e+00)\n",
      "Epoch: [9][ 400/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 9.8589e-01 (1.0936e+00)\n",
      "Epoch: [9][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0688e+00 (1.0963e+00)\n",
      "Epoch: [9][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.2241e+00 (1.0970e+00)\n",
      "Epoch: [9][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0406e+00 (1.0977e+00)\n",
      "Epoch: [9][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.6446e-01 (1.0993e+00)\n",
      "Epoch: [9][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0904e+00 (1.0990e+00)\n",
      "Epoch: [9][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1949e+00 (1.0996e+00)\n",
      "Epoch: [9][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1552e+00 (1.1013e+00)\n",
      "Epoch: [9][ 800/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1723e+00 (1.1030e+00)\n",
      "Epoch: [9][ 850/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2396e+00 (1.1029e+00)\n",
      "Epoch: [9][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2488e+00 (1.1036e+00)\n",
      "Epoch: [9][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2768e+00 (1.1030e+00)\n",
      "Epoch: [9][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1839e+00 (1.1031e+00)\n",
      "Epoch: [9][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2287e+00 (1.1029e+00)\n",
      "Epoch: [9][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2011e+00 (1.1040e+00)\n",
      "Epoch: [9][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1695e+00 (1.1035e+00)\n",
      "Epoch: [9][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2349e+00 (1.1032e+00)\n",
      "Epoch: [9][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1502e+00 (1.1034e+00)\n",
      "Epoch: [9][1300/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0571e+00 (1.1040e+00)\n",
      "Epoch: [9][1350/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9950e-01 (1.1044e+00)\n",
      "Epoch: [9][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5635e-01 (1.1046e+00)\n",
      "Epoch: [9][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0843e+00 (1.1047e+00)\n",
      "Epoch: [9][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0006e+00 (1.1042e+00)\n",
      "Epoch: [9][1550/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1088e+00 (1.1041e+00)\n",
      "Epoch: [9][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1267e+00 (1.1041e+00)\n",
      "Epoch: [9][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0772e+00 (1.1039e+00)\n",
      "Epoch: [9][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0049e+00 (1.1038e+00)\n",
      "Epoch: [9][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0841e+00 (1.1040e+00)\n",
      "Epoch: [9][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1367e+00 (1.1040e+00)\n",
      "Epoch: [9][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1870e+00 (1.1039e+00)\n",
      "Epoch: [9][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0680e+00 (1.1043e+00)\n",
      "Epoch: [9][1950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1915e-01 (1.1041e+00)\n",
      "Epoch: [9][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1608e+00 (1.1041e+00)\n",
      "Epoch: [9][2050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 1.1980e+00 (1.1042e+00)\n",
      "Epoch: [9][2100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 1.1944e+00 (1.1047e+00)\n",
      "Epoch: [9][2150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2177e+00 (1.1056e+00)\n",
      "Epoch: [9][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1787e+00 (1.1061e+00)\n",
      "Epoch: [9][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5926e-01 (1.1063e+00)\n",
      "Epoch: [9][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0850e+00 (1.1062e+00)\n",
      "Epoch: [9][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3218e+00 (1.1064e+00)\n",
      "Epoch: [9][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0290e+00 (1.1067e+00)\n",
      "Epoch: [9][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0800e+00 (1.1070e+00)\n",
      "Epoch: [9][2500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1596e+00 (1.1068e+00)\n",
      "Epoch: [9][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0883e+00 (1.1076e+00)\n",
      "Epoch: [9][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4709e-01 (1.1079e+00)\n",
      "Epoch: [9][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2052e+00 (1.1081e+00)\n",
      "Epoch: [9][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3665e+00 (1.1080e+00)\n",
      "Epoch: [9][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0364e+00 (1.1080e+00)\n",
      "Epoch: [9][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0922e+00 (1.1082e+00)\n",
      "Epoch: [9][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0446e+00 (1.1084e+00)\n",
      "Epoch: [9][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1397e+00 (1.1089e+00)\n",
      "Epoch: [9][2950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1639e+00 (1.1094e+00)\n",
      "Epoch: [9][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1784e+00 (1.1096e+00)\n",
      "Epoch: [9][3050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0825e+00 (1.1103e+00)\n",
      "Epoch: [9][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1309e+00 (1.1103e+00)\n",
      "Epoch: [9][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1549e+00 (1.1106e+00)\n",
      "Epoch: [9][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0608e+00 (1.1106e+00)\n",
      "Epoch: [9][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8541e-01 (1.1104e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [9][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0247e+00 (1.1101e+00)\n",
      "Epoch: [9][3350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2064e+00 (1.1104e+00)\n",
      "Epoch: [9][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0600e+00 (1.1101e+00)\n",
      "Epoch: [9][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6226e-01 (1.1101e+00)\n",
      "Epoch: [9][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1552e+00 (1.1106e+00)\n",
      "Epoch: [9][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0851e+00 (1.1107e+00)\n",
      "Epoch: [9][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1079e+00 (1.1111e+00)\n",
      "Epoch: [9][3650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1230e+00 (1.1115e+00)\n",
      "Epoch: [9][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0819e+00 (1.1113e+00)\n",
      "Epoch: [9][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0469e-01 (1.1111e+00)\n",
      "Epoch: [9][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1223e+00 (1.1114e+00)\n",
      "Epoch: [9][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0670e+00 (1.1115e+00)\n",
      "Epoch: [9][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0148e+00 (1.1119e+00)\n",
      "Epoch: [9][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1189e+00 (1.1120e+00)\n",
      "Epoch: [9][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0623e+00 (1.1118e+00)\n",
      "Epoch: [9][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1387e+00 (1.1121e+00)\n",
      "Epoch: [9][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3029e+00 (1.1124e+00)\n",
      "Epoch: [9][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1215e+00 (1.1127e+00)\n",
      "Epoch: [9][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1469e+00 (1.1128e+00)\n",
      "Epoch: [9][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0015e+00 (1.1128e+00)\n",
      "Epoch: [9][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2299e+00 (1.1128e+00)\n",
      "Epoch: [9][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1049e+00 (1.1132e+00)\n",
      "Epoch: [9][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1436e+00 (1.1133e+00)\n",
      "Epoch: [9][4450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2780e-01 (1.1136e+00)\n",
      "Epoch: [9][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1216e+00 (1.1139e+00)\n",
      "Epoch: [9][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0018e+00 (1.1143e+00)\n",
      "Epoch: [9][4600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1525e+00 (1.1145e+00)\n",
      "Epoch: [9][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2432e+00 (1.1147e+00)\n",
      "Epoch: [9][4700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0290e+00 (1.1148e+00)\n",
      "Epoch: [9][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1152e+00 (1.1148e+00)\n",
      "Epoch: [9][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2626e+00 (1.1150e+00)\n",
      "Epoch: [9][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0977e+00 (1.1152e+00)\n",
      "Epoch: [9][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6972e-01 (1.1153e+00)\n",
      "Epoch: [9][4950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2128e+00 (1.1155e+00)\n",
      "Epoch: [9][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2230e+00 (1.1157e+00)\n",
      "Test: [  0/196]\tTime  3.531 ( 3.531)\tLoss 6.9551e-01 (6.9551e-01)\tAcc@1  79.69 ( 79.69)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.378 ( 0.439)\tLoss 6.9493e-01 (8.7893e-01)\tAcc@1  80.47 ( 76.33)\tAcc@5  95.70 ( 93.86)\n",
      "Test: [100/196]\tTime  0.378 ( 0.409)\tLoss 1.5345e+00 (1.0115e+00)\tAcc@1  59.38 ( 73.65)\tAcc@5  88.67 ( 92.43)\n",
      "Test: [150/196]\tTime  0.378 ( 0.398)\tLoss 1.1432e+00 (1.1387e+00)\tAcc@1  75.39 ( 71.33)\tAcc@5  89.45 ( 90.62)\n",
      "epoch 9 1.1157401533028581 70.45199584960938 0.005000000000000002 2353956 0.10040116859931866\n",
      "Epoch: [10][   0/5005]\tTime  3.086 ( 3.086)\tData  2.520 ( 2.520)\tLoss 1.2005e+00 (1.2005e+00)\n",
      "Epoch: [10][  50/5005]\tTime  0.559 ( 0.609)\tData  0.000 ( 0.050)\tLoss 9.4454e-01 (1.0786e+00)\n",
      "Epoch: [10][ 100/5005]\tTime  0.559 ( 0.584)\tData  0.000 ( 0.025)\tLoss 1.2860e+00 (1.0719e+00)\n",
      "Epoch: [10][ 150/5005]\tTime  0.559 ( 0.576)\tData  0.000 ( 0.017)\tLoss 1.1636e+00 (1.0773e+00)\n",
      "Epoch: [10][ 200/5005]\tTime  0.559 ( 0.572)\tData  0.000 ( 0.013)\tLoss 1.1421e+00 (1.0763e+00)\n",
      "Epoch: [10][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.0867e+00 (1.0770e+00)\n",
      "Epoch: [10][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.2097e+00 (1.0795e+00)\n",
      "Epoch: [10][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 9.8081e-01 (1.0823e+00)\n",
      "Epoch: [10][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 9.9882e-01 (1.0849e+00)\n",
      "Epoch: [10][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0860e+00 (1.0890e+00)\n",
      "Epoch: [10][ 500/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.1914e+00 (1.0867e+00)\n",
      "Epoch: [10][ 550/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0677e+00 (1.0859e+00)\n",
      "Epoch: [10][ 600/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.2299e+00 (1.0862e+00)\n",
      "Epoch: [10][ 650/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0655e+00 (1.0848e+00)\n",
      "Epoch: [10][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.1739e+00 (1.0839e+00)\n",
      "Epoch: [10][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.4728e-01 (1.0856e+00)\n",
      "Epoch: [10][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0265e+00 (1.0854e+00)\n",
      "Epoch: [10][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1140e+00 (1.0863e+00)\n",
      "Epoch: [10][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.6619e-01 (1.0877e+00)\n",
      "Epoch: [10][ 950/5005]\tTime  0.558 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2250e+00 (1.0864e+00)\n",
      "Epoch: [10][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2623e+00 (1.0875e+00)\n",
      "Epoch: [10][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0942e+00 (1.0881e+00)\n",
      "Epoch: [10][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0354e+00 (1.0881e+00)\n",
      "Epoch: [10][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2482e+00 (1.0881e+00)\n",
      "Epoch: [10][1200/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2121e+00 (1.0892e+00)\n",
      "Epoch: [10][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0724e+00 (1.0879e+00)\n",
      "Epoch: [10][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1340e+00 (1.0887e+00)\n",
      "Epoch: [10][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0967e+00 (1.0902e+00)\n",
      "Epoch: [10][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4803e-01 (1.0900e+00)\n",
      "Epoch: [10][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0402e+00 (1.0885e+00)\n",
      "Epoch: [10][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2430e+00 (1.0889e+00)\n",
      "Epoch: [10][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1204e+00 (1.0891e+00)\n",
      "Epoch: [10][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0032e+00 (1.0885e+00)\n",
      "Epoch: [10][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.5599e-01 (1.0880e+00)\n",
      "Epoch: [10][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0152e+00 (1.0877e+00)\n",
      "Epoch: [10][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0474e+00 (1.0887e+00)\n",
      "Epoch: [10][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1688e-01 (1.0883e+00)\n",
      "Epoch: [10][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1125e+00 (1.0880e+00)\n",
      "Epoch: [10][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1689e+00 (1.0889e+00)\n",
      "Epoch: [10][1950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0866e-01 (1.0888e+00)\n",
      "Epoch: [10][2000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2320e+00 (1.0892e+00)\n",
      "Epoch: [10][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2922e-01 (1.0899e+00)\n",
      "Epoch: [10][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1628e+00 (1.0901e+00)\n",
      "Epoch: [10][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0575e+00 (1.0905e+00)\n",
      "Epoch: [10][2200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1491e+00 (1.0908e+00)\n",
      "Epoch: [10][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0721e+00 (1.0914e+00)\n",
      "Epoch: [10][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2062e+00 (1.0923e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [10][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1255e+00 (1.0923e+00)\n",
      "Epoch: [10][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6689e-01 (1.0927e+00)\n",
      "Epoch: [10][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9029e-01 (1.0929e+00)\n",
      "Epoch: [10][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0930e+00 (1.0928e+00)\n",
      "Epoch: [10][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1045e+00 (1.0929e+00)\n",
      "Epoch: [10][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1057e+00 (1.0937e+00)\n",
      "Epoch: [10][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5363e-01 (1.0935e+00)\n",
      "Epoch: [10][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1154e+00 (1.0937e+00)\n",
      "Epoch: [10][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1648e+00 (1.0936e+00)\n",
      "Epoch: [10][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2588e+00 (1.0942e+00)\n",
      "Epoch: [10][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0529e+00 (1.0944e+00)\n",
      "Epoch: [10][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8949e-01 (1.0945e+00)\n",
      "Epoch: [10][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1422e+00 (1.0944e+00)\n",
      "Epoch: [10][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1781e+00 (1.0946e+00)\n",
      "Epoch: [10][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1304e+00 (1.0947e+00)\n",
      "Epoch: [10][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1599e+00 (1.0946e+00)\n",
      "Epoch: [10][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1986e+00 (1.0943e+00)\n",
      "Epoch: [10][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1549e+00 (1.0939e+00)\n",
      "Epoch: [10][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8969e-01 (1.0939e+00)\n",
      "Epoch: [10][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6958e-01 (1.0937e+00)\n",
      "Epoch: [10][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1828e+00 (1.0942e+00)\n",
      "Epoch: [10][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4521e-01 (1.0940e+00)\n",
      "Epoch: [10][3450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2069e+00 (1.0942e+00)\n",
      "Epoch: [10][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8965e-01 (1.0941e+00)\n",
      "Epoch: [10][3550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1361e+00 (1.0943e+00)\n",
      "Epoch: [10][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2101e+00 (1.0944e+00)\n",
      "Epoch: [10][3650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1827e+00 (1.0944e+00)\n",
      "Epoch: [10][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1443e+00 (1.0944e+00)\n",
      "Epoch: [10][3750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1022e+00 (1.0945e+00)\n",
      "Epoch: [10][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1313e+00 (1.0947e+00)\n",
      "Epoch: [10][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1987e+00 (1.0950e+00)\n",
      "Epoch: [10][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0865e+00 (1.0957e+00)\n",
      "Epoch: [10][3950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1301e+00 (1.0956e+00)\n",
      "Epoch: [10][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1357e+00 (1.0957e+00)\n",
      "Epoch: [10][4050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1153e+00 (1.0958e+00)\n",
      "Epoch: [10][4100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1542e+00 (1.0960e+00)\n",
      "Epoch: [10][4150/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1210e+00 (1.0962e+00)\n",
      "Epoch: [10][4200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1478e+00 (1.0962e+00)\n",
      "Epoch: [10][4250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2683e+00 (1.0962e+00)\n",
      "Epoch: [10][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0541e+00 (1.0964e+00)\n",
      "Epoch: [10][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8218e-01 (1.0962e+00)\n",
      "Epoch: [10][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1764e-01 (1.0965e+00)\n",
      "Epoch: [10][4450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1264e+00 (1.0966e+00)\n",
      "Epoch: [10][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4716e-01 (1.0967e+00)\n",
      "Epoch: [10][4550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1026e+00 (1.0969e+00)\n",
      "Epoch: [10][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1088e+00 (1.0970e+00)\n",
      "Epoch: [10][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1748e+00 (1.0969e+00)\n",
      "Epoch: [10][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0909e+00 (1.0971e+00)\n",
      "Epoch: [10][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7366e-01 (1.0971e+00)\n",
      "Epoch: [10][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0341e+00 (1.0971e+00)\n",
      "Epoch: [10][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3619e-01 (1.0973e+00)\n",
      "Epoch: [10][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2036e+00 (1.0972e+00)\n",
      "Epoch: [10][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1584e+00 (1.0973e+00)\n",
      "Epoch: [10][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2321e+00 (1.0973e+00)\n",
      "Test: [  0/196]\tTime  3.422 ( 3.422)\tLoss 6.5838e-01 (6.5838e-01)\tAcc@1  79.69 ( 79.69)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.437)\tLoss 6.6049e-01 (8.8985e-01)\tAcc@1  83.20 ( 76.21)\tAcc@5  96.48 ( 93.70)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 1.8103e+00 (1.0314e+00)\tAcc@1  53.52 ( 73.09)\tAcc@5  80.86 ( 92.03)\n",
      "Test: [150/196]\tTime  0.378 ( 0.398)\tLoss 1.3276e+00 (1.1635e+00)\tAcc@1  69.53 ( 70.66)\tAcc@5  87.50 ( 90.20)\n",
      "epoch 10 1.0974175754137507 69.83599853515625 0.004500000000000001 2353956 0.10040116859931866\n",
      "Epoch: [11][   0/5005]\tTime  2.865 ( 2.865)\tData  2.303 ( 2.303)\tLoss 1.0460e+00 (1.0460e+00)\n",
      "Epoch: [11][  50/5005]\tTime  0.560 ( 0.604)\tData  0.000 ( 0.045)\tLoss 1.0735e+00 (1.0750e+00)\n",
      "Epoch: [11][ 100/5005]\tTime  0.559 ( 0.582)\tData  0.000 ( 0.023)\tLoss 1.0985e+00 (1.0755e+00)\n",
      "Epoch: [11][ 150/5005]\tTime  0.560 ( 0.574)\tData  0.000 ( 0.015)\tLoss 1.0386e+00 (1.0732e+00)\n",
      "Epoch: [11][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.012)\tLoss 1.0845e+00 (1.0691e+00)\n",
      "Epoch: [11][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.009)\tLoss 1.1991e+00 (1.0725e+00)\n",
      "Epoch: [11][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.008)\tLoss 1.0865e+00 (1.0726e+00)\n",
      "Epoch: [11][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0834e+00 (1.0728e+00)\n",
      "Epoch: [11][ 400/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.006)\tLoss 1.0684e+00 (1.0726e+00)\n",
      "Epoch: [11][ 450/5005]\tTime  0.560 ( 0.565)\tData  0.000 ( 0.005)\tLoss 1.1437e+00 (1.0724e+00)\n",
      "Epoch: [11][ 500/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0482e+00 (1.0717e+00)\n",
      "Epoch: [11][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.004)\tLoss 1.1057e+00 (1.0718e+00)\n",
      "Epoch: [11][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0501e+00 (1.0711e+00)\n",
      "Epoch: [11][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.1768e-01 (1.0698e+00)\n",
      "Epoch: [11][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.003)\tLoss 1.0811e+00 (1.0703e+00)\n",
      "Epoch: [11][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.003)\tLoss 1.2704e+00 (1.0709e+00)\n",
      "Epoch: [11][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.3480e-01 (1.0710e+00)\n",
      "Epoch: [11][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2517e+00 (1.0723e+00)\n",
      "Epoch: [11][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0919e+00 (1.0730e+00)\n",
      "Epoch: [11][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0525e+00 (1.0714e+00)\n",
      "Epoch: [11][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.002)\tLoss 1.0272e+00 (1.0711e+00)\n",
      "Epoch: [11][1050/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.002)\tLoss 1.1554e+00 (1.0704e+00)\n",
      "Epoch: [11][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.9003e-01 (1.0697e+00)\n",
      "Epoch: [11][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0162e-01 (1.0697e+00)\n",
      "Epoch: [11][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0997e+00 (1.0687e+00)\n",
      "Epoch: [11][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0606e+00 (1.0670e+00)\n",
      "Epoch: [11][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2793e+00 (1.0680e+00)\n",
      "Epoch: [11][1350/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2023e+00 (1.0690e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [11][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0428e+00 (1.0693e+00)\n",
      "Epoch: [11][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8056e-01 (1.0697e+00)\n",
      "Epoch: [11][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0078e+00 (1.0701e+00)\n",
      "Epoch: [11][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0211e+00 (1.0704e+00)\n",
      "Epoch: [11][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8310e-01 (1.0703e+00)\n",
      "Epoch: [11][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0105e+00 (1.0705e+00)\n",
      "Epoch: [11][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0085e+00 (1.0706e+00)\n",
      "Epoch: [11][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 1.0363e+00 (1.0704e+00)\n",
      "Epoch: [11][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 1.2299e+00 (1.0704e+00)\n",
      "Epoch: [11][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 1.1624e+00 (1.0706e+00)\n",
      "Epoch: [11][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0649e+00 (1.0708e+00)\n",
      "Epoch: [11][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1445e+00 (1.0713e+00)\n",
      "Epoch: [11][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0304e+00 (1.0716e+00)\n",
      "Epoch: [11][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1081e+00 (1.0723e+00)\n",
      "Epoch: [11][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1463e+00 (1.0726e+00)\n",
      "Epoch: [11][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9544e-01 (1.0726e+00)\n",
      "Epoch: [11][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1354e+00 (1.0724e+00)\n",
      "Epoch: [11][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1089e+00 (1.0724e+00)\n",
      "Epoch: [11][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0102e+00 (1.0723e+00)\n",
      "Epoch: [11][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4192e-01 (1.0728e+00)\n",
      "Epoch: [11][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0189e+00 (1.0732e+00)\n",
      "Epoch: [11][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0463e+00 (1.0735e+00)\n",
      "Epoch: [11][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2177e+00 (1.0738e+00)\n",
      "Epoch: [11][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0574e+00 (1.0734e+00)\n",
      "Epoch: [11][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1268e+00 (1.0740e+00)\n",
      "Epoch: [11][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9852e-01 (1.0737e+00)\n",
      "Epoch: [11][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0669e+00 (1.0737e+00)\n",
      "Epoch: [11][2750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0256e+00 (1.0740e+00)\n",
      "Epoch: [11][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9358e-01 (1.0742e+00)\n",
      "Epoch: [11][2850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0500e+00 (1.0746e+00)\n",
      "Epoch: [11][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1291e+00 (1.0752e+00)\n",
      "Epoch: [11][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0755e+00 (1.0749e+00)\n",
      "Epoch: [11][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6189e-01 (1.0748e+00)\n",
      "Epoch: [11][3050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5429e-01 (1.0745e+00)\n",
      "Epoch: [11][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0765e+00 (1.0743e+00)\n",
      "Epoch: [11][3150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1070e+00 (1.0747e+00)\n",
      "Epoch: [11][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2952e+00 (1.0745e+00)\n",
      "Epoch: [11][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0408e+00 (1.0746e+00)\n",
      "Epoch: [11][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2142e+00 (1.0749e+00)\n",
      "Epoch: [11][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0036e+00 (1.0749e+00)\n",
      "Epoch: [11][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1868e+00 (1.0756e+00)\n",
      "Epoch: [11][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2038e+00 (1.0752e+00)\n",
      "Epoch: [11][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9625e-01 (1.0752e+00)\n",
      "Epoch: [11][3550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2644e+00 (1.0753e+00)\n",
      "Epoch: [11][3600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1389e+00 (1.0758e+00)\n",
      "Epoch: [11][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1822e+00 (1.0757e+00)\n",
      "Epoch: [11][3700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0900e+00 (1.0758e+00)\n",
      "Epoch: [11][3750/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4637e-01 (1.0756e+00)\n",
      "Epoch: [11][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1112e+00 (1.0758e+00)\n",
      "Epoch: [11][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2392e+00 (1.0760e+00)\n",
      "Epoch: [11][3900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0194e+00 (1.0761e+00)\n",
      "Epoch: [11][3950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0847e+00 (1.0761e+00)\n",
      "Epoch: [11][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2161e-01 (1.0765e+00)\n",
      "Epoch: [11][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2187e+00 (1.0762e+00)\n",
      "Epoch: [11][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2530e+00 (1.0766e+00)\n",
      "Epoch: [11][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3695e+00 (1.0770e+00)\n",
      "Epoch: [11][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1600e+00 (1.0771e+00)\n",
      "Epoch: [11][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0771e+00 (1.0772e+00)\n",
      "Epoch: [11][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1198e+00 (1.0773e+00)\n",
      "Epoch: [11][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0727e+00 (1.0775e+00)\n",
      "Epoch: [11][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9787e-01 (1.0778e+00)\n",
      "Epoch: [11][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1156e+00 (1.0781e+00)\n",
      "Epoch: [11][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.3946e+00 (1.0785e+00)\n",
      "Epoch: [11][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1494e+00 (1.0785e+00)\n",
      "Epoch: [11][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0528e+00 (1.0787e+00)\n",
      "Epoch: [11][4650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0222e+00 (1.0785e+00)\n",
      "Epoch: [11][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8270e-01 (1.0785e+00)\n",
      "Epoch: [11][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2466e+00 (1.0785e+00)\n",
      "Epoch: [11][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0351e+00 (1.0785e+00)\n",
      "Epoch: [11][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1606e+00 (1.0789e+00)\n",
      "Epoch: [11][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1850e-01 (1.0788e+00)\n",
      "Epoch: [11][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1352e+00 (1.0790e+00)\n",
      "Epoch: [11][5000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1320e+00 (1.0792e+00)\n",
      "Test: [  0/196]\tTime  3.323 ( 3.323)\tLoss 7.3806e-01 (7.3806e-01)\tAcc@1  79.69 ( 79.69)\tAcc@5  95.31 ( 95.31)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 5.9899e-01 (8.9172e-01)\tAcc@1  85.55 ( 76.39)\tAcc@5  96.48 ( 93.53)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.7299e+00 (1.0334e+00)\tAcc@1  55.47 ( 73.34)\tAcc@5  83.20 ( 91.94)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.3269e+00 (1.1584e+00)\tAcc@1  73.44 ( 70.89)\tAcc@5  84.77 ( 90.23)\n",
      "epoch 11 1.0792749469975476 69.85199737548828 0.004000000000000002 2353956 0.10040116859931866\n",
      "Epoch: [12][   0/5005]\tTime  3.285 ( 3.285)\tData  2.725 ( 2.725)\tLoss 8.7726e-01 (8.7726e-01)\n",
      "Epoch: [12][  50/5005]\tTime  0.559 ( 0.613)\tData  0.000 ( 0.054)\tLoss 8.5271e-01 (1.0475e+00)\n",
      "Epoch: [12][ 100/5005]\tTime  0.559 ( 0.586)\tData  0.000 ( 0.027)\tLoss 9.5503e-01 (1.0509e+00)\n",
      "Epoch: [12][ 150/5005]\tTime  0.556 ( 0.577)\tData  0.000 ( 0.018)\tLoss 1.1174e+00 (1.0510e+00)\n",
      "Epoch: [12][ 200/5005]\tTime  0.559 ( 0.573)\tData  0.000 ( 0.014)\tLoss 1.0435e+00 (1.0496e+00)\n",
      "Epoch: [12][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 9.9945e-01 (1.0457e+00)\n",
      "Epoch: [12][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 9.5626e-01 (1.0459e+00)\n",
      "Epoch: [12][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 9.2075e-01 (1.0457e+00)\n",
      "Epoch: [12][ 400/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 9.4386e-01 (1.0491e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [12][ 450/5005]\tTime  0.560 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0309e+00 (1.0514e+00)\n",
      "Epoch: [12][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1085e+00 (1.0504e+00)\n",
      "Epoch: [12][ 550/5005]\tTime  0.558 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0110e+00 (1.0510e+00)\n",
      "Epoch: [12][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0070e+00 (1.0515e+00)\n",
      "Epoch: [12][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.8414e-01 (1.0506e+00)\n",
      "Epoch: [12][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.7527e-01 (1.0508e+00)\n",
      "Epoch: [12][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0330e+00 (1.0497e+00)\n",
      "Epoch: [12][ 800/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.7572e-01 (1.0507e+00)\n",
      "Epoch: [12][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2793e+00 (1.0505e+00)\n",
      "Epoch: [12][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0758e+00 (1.0496e+00)\n",
      "Epoch: [12][ 950/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1198e+00 (1.0503e+00)\n",
      "Epoch: [12][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1037e+00 (1.0511e+00)\n",
      "Epoch: [12][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.3075e+00 (1.0521e+00)\n",
      "Epoch: [12][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0439e+00 (1.0523e+00)\n",
      "Epoch: [12][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0591e+00 (1.0521e+00)\n",
      "Epoch: [12][1200/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.002)\tLoss 9.2999e-01 (1.0529e+00)\n",
      "Epoch: [12][1250/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1107e+00 (1.0518e+00)\n",
      "Epoch: [12][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2279e-01 (1.0518e+00)\n",
      "Epoch: [12][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0829e+00 (1.0521e+00)\n",
      "Epoch: [12][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1408e-01 (1.0526e+00)\n",
      "Epoch: [12][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1642e+00 (1.0527e+00)\n",
      "Epoch: [12][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0836e+00 (1.0531e+00)\n",
      "Epoch: [12][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8003e-01 (1.0538e+00)\n",
      "Epoch: [12][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.9611e-01 (1.0538e+00)\n",
      "Epoch: [12][1650/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1800e+00 (1.0533e+00)\n",
      "Epoch: [12][1700/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1103e+00 (1.0534e+00)\n",
      "Epoch: [12][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0574e+00 (1.0529e+00)\n",
      "Epoch: [12][1800/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1878e+00 (1.0535e+00)\n",
      "Epoch: [12][1850/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1557e+00 (1.0543e+00)\n",
      "Epoch: [12][1900/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1001e+00 (1.0544e+00)\n",
      "Epoch: [12][1950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6756e-01 (1.0546e+00)\n",
      "Epoch: [12][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0826e+00 (1.0546e+00)\n",
      "Epoch: [12][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0315e+00 (1.0545e+00)\n",
      "Epoch: [12][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1732e+00 (1.0545e+00)\n",
      "Epoch: [12][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2358e-01 (1.0550e+00)\n",
      "Epoch: [12][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0614e+00 (1.0549e+00)\n",
      "Epoch: [12][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0944e+00 (1.0550e+00)\n",
      "Epoch: [12][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9011e-01 (1.0550e+00)\n",
      "Epoch: [12][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1347e+00 (1.0550e+00)\n",
      "Epoch: [12][2400/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0060e+00 (1.0550e+00)\n",
      "Epoch: [12][2450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5491e-01 (1.0551e+00)\n",
      "Epoch: [12][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4105e-01 (1.0553e+00)\n",
      "Epoch: [12][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1139e+00 (1.0556e+00)\n",
      "Epoch: [12][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8452e-01 (1.0558e+00)\n",
      "Epoch: [12][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0154e+00 (1.0562e+00)\n",
      "Epoch: [12][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1007e+00 (1.0563e+00)\n",
      "Epoch: [12][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0602e+00 (1.0565e+00)\n",
      "Epoch: [12][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1411e+00 (1.0567e+00)\n",
      "Epoch: [12][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0369e+00 (1.0566e+00)\n",
      "Epoch: [12][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1242e+00 (1.0569e+00)\n",
      "Epoch: [12][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1406e+00 (1.0574e+00)\n",
      "Epoch: [12][3000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0444e+00 (1.0575e+00)\n",
      "Epoch: [12][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0029e+00 (1.0581e+00)\n",
      "Epoch: [12][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0276e+00 (1.0584e+00)\n",
      "Epoch: [12][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9031e-01 (1.0583e+00)\n",
      "Epoch: [12][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0397e+00 (1.0586e+00)\n",
      "Epoch: [12][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0834e+00 (1.0585e+00)\n",
      "Epoch: [12][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6739e-01 (1.0588e+00)\n",
      "Epoch: [12][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1819e+00 (1.0587e+00)\n",
      "Epoch: [12][3400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0765e+00 (1.0587e+00)\n",
      "Epoch: [12][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0886e+00 (1.0588e+00)\n",
      "Epoch: [12][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0275e+00 (1.0587e+00)\n",
      "Epoch: [12][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0003e+00 (1.0590e+00)\n",
      "Epoch: [12][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0998e+00 (1.0593e+00)\n",
      "Epoch: [12][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1935e+00 (1.0593e+00)\n",
      "Epoch: [12][3700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0443e+00 (1.0596e+00)\n",
      "Epoch: [12][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1409e+00 (1.0595e+00)\n",
      "Epoch: [12][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2712e+00 (1.0598e+00)\n",
      "Epoch: [12][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8960e-01 (1.0594e+00)\n",
      "Epoch: [12][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1238e+00 (1.0596e+00)\n",
      "Epoch: [12][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3889e-01 (1.0597e+00)\n",
      "Epoch: [12][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9708e-01 (1.0601e+00)\n",
      "Epoch: [12][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0052e-01 (1.0604e+00)\n",
      "Epoch: [12][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2123e+00 (1.0606e+00)\n",
      "Epoch: [12][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1773e+00 (1.0606e+00)\n",
      "Epoch: [12][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1145e+00 (1.0607e+00)\n",
      "Epoch: [12][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0915e+00 (1.0608e+00)\n",
      "Epoch: [12][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2806e+00 (1.0610e+00)\n",
      "Epoch: [12][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1005e+00 (1.0611e+00)\n",
      "Epoch: [12][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1153e+00 (1.0611e+00)\n",
      "Epoch: [12][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0232e-01 (1.0610e+00)\n",
      "Epoch: [12][4500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7909e-01 (1.0609e+00)\n",
      "Epoch: [12][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0075e+00 (1.0609e+00)\n",
      "Epoch: [12][4600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0573e+00 (1.0607e+00)\n",
      "Epoch: [12][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0430e+00 (1.0608e+00)\n",
      "Epoch: [12][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5427e-01 (1.0606e+00)\n",
      "Epoch: [12][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2492e+00 (1.0607e+00)\n",
      "Epoch: [12][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0129e+00 (1.0609e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [12][4850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0023e+00 (1.0609e+00)\n",
      "Epoch: [12][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0867e+00 (1.0610e+00)\n",
      "Epoch: [12][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5410e-01 (1.0614e+00)\n",
      "Epoch: [12][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1866e+00 (1.0617e+00)\n",
      "Test: [  0/196]\tTime  3.375 ( 3.375)\tLoss 6.5323e-01 (6.5323e-01)\tAcc@1  81.25 ( 81.25)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.378 ( 0.436)\tLoss 5.2324e-01 (8.3940e-01)\tAcc@1  87.50 ( 77.72)\tAcc@5  96.48 ( 94.23)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.4116e+00 (9.7806e-01)\tAcc@1  63.28 ( 74.64)\tAcc@5  87.89 ( 92.72)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.1879e+00 (1.0970e+00)\tAcc@1  73.83 ( 72.39)\tAcc@5  89.45 ( 91.13)\n",
      "epoch 12 1.0616191091124174 71.36199951171875 0.003500000000000001 2353956 0.10040116859931866\n",
      "Epoch: [13][   0/5005]\tTime  2.892 ( 2.892)\tData  2.331 ( 2.331)\tLoss 1.1572e+00 (1.1572e+00)\n",
      "Epoch: [13][  50/5005]\tTime  0.559 ( 0.605)\tData  0.000 ( 0.046)\tLoss 9.3256e-01 (1.0505e+00)\n",
      "Epoch: [13][ 100/5005]\tTime  0.559 ( 0.582)\tData  0.000 ( 0.023)\tLoss 9.4951e-01 (1.0270e+00)\n",
      "Epoch: [13][ 150/5005]\tTime  0.559 ( 0.575)\tData  0.000 ( 0.016)\tLoss 9.5160e-01 (1.0212e+00)\n",
      "Epoch: [13][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.012)\tLoss 9.9115e-01 (1.0268e+00)\n",
      "Epoch: [13][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.009)\tLoss 9.1074e-01 (1.0247e+00)\n",
      "Epoch: [13][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.0065e+00 (1.0278e+00)\n",
      "Epoch: [13][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0391e+00 (1.0303e+00)\n",
      "Epoch: [13][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1038e+00 (1.0312e+00)\n",
      "Epoch: [13][ 450/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0315e+00 (1.0321e+00)\n",
      "Epoch: [13][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.2017e+00 (1.0343e+00)\n",
      "Epoch: [13][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0784e+00 (1.0324e+00)\n",
      "Epoch: [13][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.5030e-01 (1.0325e+00)\n",
      "Epoch: [13][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.9350e-01 (1.0309e+00)\n",
      "Epoch: [13][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0385e+00 (1.0308e+00)\n",
      "Epoch: [13][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.3008e-01 (1.0315e+00)\n",
      "Epoch: [13][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.3730e-01 (1.0314e+00)\n",
      "Epoch: [13][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2545e+00 (1.0318e+00)\n",
      "Epoch: [13][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.2705e-01 (1.0329e+00)\n",
      "Epoch: [13][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0903e+00 (1.0322e+00)\n",
      "Epoch: [13][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.0226e-01 (1.0315e+00)\n",
      "Epoch: [13][1050/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0289e-01 (1.0317e+00)\n",
      "Epoch: [13][1100/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2183e+00 (1.0324e+00)\n",
      "Epoch: [13][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7798e-01 (1.0317e+00)\n",
      "Epoch: [13][1200/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0638e+00 (1.0323e+00)\n",
      "Epoch: [13][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1520e+00 (1.0318e+00)\n",
      "Epoch: [13][1300/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0345e+00 (1.0315e+00)\n",
      "Epoch: [13][1350/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0030e+00 (1.0328e+00)\n",
      "Epoch: [13][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6052e-01 (1.0320e+00)\n",
      "Epoch: [13][1450/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7015e-01 (1.0318e+00)\n",
      "Epoch: [13][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0717e-01 (1.0326e+00)\n",
      "Epoch: [13][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0821e+00 (1.0328e+00)\n",
      "Epoch: [13][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1901e+00 (1.0337e+00)\n",
      "Epoch: [13][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1433e+00 (1.0354e+00)\n",
      "Epoch: [13][1700/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1359e+00 (1.0362e+00)\n",
      "Epoch: [13][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0837e+00 (1.0368e+00)\n",
      "Epoch: [13][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8433e-01 (1.0365e+00)\n",
      "Epoch: [13][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0684e+00 (1.0369e+00)\n",
      "Epoch: [13][1900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0239e+00 (1.0367e+00)\n",
      "Epoch: [13][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1373e+00 (1.0368e+00)\n",
      "Epoch: [13][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1348e+00 (1.0372e+00)\n",
      "Epoch: [13][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1750e+00 (1.0378e+00)\n",
      "Epoch: [13][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4978e-01 (1.0379e+00)\n",
      "Epoch: [13][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0502e+00 (1.0376e+00)\n",
      "Epoch: [13][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0525e+00 (1.0380e+00)\n",
      "Epoch: [13][2250/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9384e-01 (1.0380e+00)\n",
      "Epoch: [13][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4116e-01 (1.0381e+00)\n",
      "Epoch: [13][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1244e+00 (1.0387e+00)\n",
      "Epoch: [13][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0637e+00 (1.0392e+00)\n",
      "Epoch: [13][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1150e-01 (1.0390e+00)\n",
      "Epoch: [13][2500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7651e-01 (1.0390e+00)\n",
      "Epoch: [13][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0015e+00 (1.0392e+00)\n",
      "Epoch: [13][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1725e+00 (1.0394e+00)\n",
      "Epoch: [13][2650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1976e+00 (1.0391e+00)\n",
      "Epoch: [13][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0419e+00 (1.0392e+00)\n",
      "Epoch: [13][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0596e+00 (1.0387e+00)\n",
      "Epoch: [13][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0398e+00 (1.0389e+00)\n",
      "Epoch: [13][2850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8104e-01 (1.0390e+00)\n",
      "Epoch: [13][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0518e+00 (1.0394e+00)\n",
      "Epoch: [13][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3451e-01 (1.0396e+00)\n",
      "Epoch: [13][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7939e-01 (1.0394e+00)\n",
      "Epoch: [13][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2888e-01 (1.0396e+00)\n",
      "Epoch: [13][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7325e-01 (1.0398e+00)\n",
      "Epoch: [13][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8036e-01 (1.0397e+00)\n",
      "Epoch: [13][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9222e-01 (1.0399e+00)\n",
      "Epoch: [13][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1417e+00 (1.0396e+00)\n",
      "Epoch: [13][3300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0126e+00 (1.0401e+00)\n",
      "Epoch: [13][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3813e-01 (1.0401e+00)\n",
      "Epoch: [13][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0358e+00 (1.0404e+00)\n",
      "Epoch: [13][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0306e+00 (1.0405e+00)\n",
      "Epoch: [13][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9705e-01 (1.0404e+00)\n",
      "Epoch: [13][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2257e+00 (1.0407e+00)\n",
      "Epoch: [13][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2335e+00 (1.0406e+00)\n",
      "Epoch: [13][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3395e-01 (1.0408e+00)\n",
      "Epoch: [13][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0013e+00 (1.0412e+00)\n",
      "Epoch: [13][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2672e+00 (1.0414e+00)\n",
      "Epoch: [13][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5300e-01 (1.0415e+00)\n",
      "Epoch: [13][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2046e+00 (1.0417e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [13][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0936e+00 (1.0416e+00)\n",
      "Epoch: [13][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0880e+00 (1.0414e+00)\n",
      "Epoch: [13][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2639e-01 (1.0414e+00)\n",
      "Epoch: [13][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0282e+00 (1.0416e+00)\n",
      "Epoch: [13][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1386e+00 (1.0417e+00)\n",
      "Epoch: [13][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0535e+00 (1.0416e+00)\n",
      "Epoch: [13][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0368e+00 (1.0416e+00)\n",
      "Epoch: [13][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2879e+00 (1.0419e+00)\n",
      "Epoch: [13][4300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0673e+00 (1.0416e+00)\n",
      "Epoch: [13][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1610e+00 (1.0416e+00)\n",
      "Epoch: [13][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1215e+00 (1.0419e+00)\n",
      "Epoch: [13][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1380e-01 (1.0418e+00)\n",
      "Epoch: [13][4500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1821e+00 (1.0420e+00)\n",
      "Epoch: [13][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0753e+00 (1.0420e+00)\n",
      "Epoch: [13][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3228e-01 (1.0422e+00)\n",
      "Epoch: [13][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0868e+00 (1.0423e+00)\n",
      "Epoch: [13][4700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0529e+00 (1.0424e+00)\n",
      "Epoch: [13][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0649e+00 (1.0423e+00)\n",
      "Epoch: [13][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2456e+00 (1.0424e+00)\n",
      "Epoch: [13][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1178e+00 (1.0426e+00)\n",
      "Epoch: [13][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6642e-01 (1.0426e+00)\n",
      "Epoch: [13][4950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1268e+00 (1.0427e+00)\n",
      "Epoch: [13][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2506e+00 (1.0428e+00)\n",
      "Test: [  0/196]\tTime  3.344 ( 3.344)\tLoss 6.4015e-01 (6.4015e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  95.31 ( 95.31)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 6.0849e-01 (8.5014e-01)\tAcc@1  84.77 ( 77.50)\tAcc@5  95.31 ( 94.20)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.5930e+00 (9.8373e-01)\tAcc@1  58.20 ( 74.69)\tAcc@5  86.72 ( 92.68)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.2284e+00 (1.1044e+00)\tAcc@1  73.44 ( 72.16)\tAcc@5  88.28 ( 91.02)\n",
      "epoch 13 1.0427952382482826 71.1240005493164 0.0030000000000000014 2353956 0.10040116859931866\n",
      "Epoch: [14][   0/5005]\tTime  3.242 ( 3.242)\tData  2.680 ( 2.680)\tLoss 8.7131e-01 (8.7131e-01)\n",
      "Epoch: [14][  50/5005]\tTime  0.560 ( 0.612)\tData  0.000 ( 0.053)\tLoss 1.0288e+00 (9.8526e-01)\n",
      "Epoch: [14][ 100/5005]\tTime  0.559 ( 0.586)\tData  0.000 ( 0.027)\tLoss 9.7113e-01 (9.8594e-01)\n",
      "Epoch: [14][ 150/5005]\tTime  0.559 ( 0.577)\tData  0.000 ( 0.018)\tLoss 1.0050e+00 (9.9871e-01)\n",
      "Epoch: [14][ 200/5005]\tTime  0.559 ( 0.573)\tData  0.000 ( 0.014)\tLoss 1.0370e+00 (9.9753e-01)\n",
      "Epoch: [14][ 250/5005]\tTime  0.560 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.0379e+00 (9.9823e-01)\n",
      "Epoch: [14][ 300/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.1675e+00 (1.0042e+00)\n",
      "Epoch: [14][ 350/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 9.3517e-01 (1.0063e+00)\n",
      "Epoch: [14][ 400/5005]\tTime  0.560 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0445e+00 (1.0055e+00)\n",
      "Epoch: [14][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 8.9496e-01 (1.0045e+00)\n",
      "Epoch: [14][ 500/5005]\tTime  0.560 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0517e+00 (1.0043e+00)\n",
      "Epoch: [14][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.2320e+00 (1.0022e+00)\n",
      "Epoch: [14][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0432e+00 (1.0026e+00)\n",
      "Epoch: [14][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0962e+00 (1.0033e+00)\n",
      "Epoch: [14][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.6302e-01 (1.0051e+00)\n",
      "Epoch: [14][ 750/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.2204e+00 (1.0065e+00)\n",
      "Epoch: [14][ 800/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0007e+00 (1.0056e+00)\n",
      "Epoch: [14][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1426e+00 (1.0059e+00)\n",
      "Epoch: [14][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.5133e-01 (1.0060e+00)\n",
      "Epoch: [14][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0940e+00 (1.0074e+00)\n",
      "Epoch: [14][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0598e+00 (1.0077e+00)\n",
      "Epoch: [14][1050/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.7775e-01 (1.0089e+00)\n",
      "Epoch: [14][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1797e+00 (1.0100e+00)\n",
      "Epoch: [14][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.4570e-01 (1.0095e+00)\n",
      "Epoch: [14][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1121e+00 (1.0107e+00)\n",
      "Epoch: [14][1250/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.7499e-01 (1.0111e+00)\n",
      "Epoch: [14][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0496e+00 (1.0115e+00)\n",
      "Epoch: [14][1350/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.5178e-01 (1.0111e+00)\n",
      "Epoch: [14][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1191e+00 (1.0111e+00)\n",
      "Epoch: [14][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1512e+00 (1.0117e+00)\n",
      "Epoch: [14][1500/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0615e+00 (1.0121e+00)\n",
      "Epoch: [14][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0835e+00 (1.0123e+00)\n",
      "Epoch: [14][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.5937e-01 (1.0128e+00)\n",
      "Epoch: [14][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7218e-01 (1.0133e+00)\n",
      "Epoch: [14][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2365e-01 (1.0132e+00)\n",
      "Epoch: [14][1750/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2987e-01 (1.0137e+00)\n",
      "Epoch: [14][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6798e-01 (1.0141e+00)\n",
      "Epoch: [14][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1159e+00 (1.0143e+00)\n",
      "Epoch: [14][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0136e+00 (1.0147e+00)\n",
      "Epoch: [14][1950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8864e-01 (1.0155e+00)\n",
      "Epoch: [14][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.8304e-01 (1.0158e+00)\n",
      "Epoch: [14][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0001e+00 (1.0153e+00)\n",
      "Epoch: [14][2100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7455e-01 (1.0152e+00)\n",
      "Epoch: [14][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1455e+00 (1.0146e+00)\n",
      "Epoch: [14][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0404e+00 (1.0149e+00)\n",
      "Epoch: [14][2250/5005]\tTime  0.557 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0286e+00 (1.0152e+00)\n",
      "Epoch: [14][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8028e-01 (1.0152e+00)\n",
      "Epoch: [14][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8669e-01 (1.0156e+00)\n",
      "Epoch: [14][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0693e-01 (1.0159e+00)\n",
      "Epoch: [14][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0484e+00 (1.0168e+00)\n",
      "Epoch: [14][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4368e-01 (1.0172e+00)\n",
      "Epoch: [14][2550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5931e-01 (1.0172e+00)\n",
      "Epoch: [14][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0993e+00 (1.0171e+00)\n",
      "Epoch: [14][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7380e-01 (1.0174e+00)\n",
      "Epoch: [14][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9877e-01 (1.0174e+00)\n",
      "Epoch: [14][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0615e+00 (1.0179e+00)\n",
      "Epoch: [14][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5121e-01 (1.0186e+00)\n",
      "Epoch: [14][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0760e+00 (1.0187e+00)\n",
      "Epoch: [14][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0464e+00 (1.0191e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [14][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4935e-01 (1.0194e+00)\n",
      "Epoch: [14][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1270e+00 (1.0197e+00)\n",
      "Epoch: [14][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1166e+00 (1.0197e+00)\n",
      "Epoch: [14][3100/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1198e+00 (1.0199e+00)\n",
      "Epoch: [14][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0179e+00 (1.0197e+00)\n",
      "Epoch: [14][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5509e-01 (1.0202e+00)\n",
      "Epoch: [14][3250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0393e-01 (1.0202e+00)\n",
      "Epoch: [14][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0269e+00 (1.0203e+00)\n",
      "Epoch: [14][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2353e-01 (1.0199e+00)\n",
      "Epoch: [14][3400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3773e-01 (1.0203e+00)\n",
      "Epoch: [14][3450/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2559e-01 (1.0209e+00)\n",
      "Epoch: [14][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1952e+00 (1.0211e+00)\n",
      "Epoch: [14][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8115e-01 (1.0211e+00)\n",
      "Epoch: [14][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0941e+00 (1.0210e+00)\n",
      "Epoch: [14][3650/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1523e+00 (1.0214e+00)\n",
      "Epoch: [14][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0042e+00 (1.0215e+00)\n",
      "Epoch: [14][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4968e-01 (1.0216e+00)\n",
      "Epoch: [14][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1359e+00 (1.0220e+00)\n",
      "Epoch: [14][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3895e-01 (1.0223e+00)\n",
      "Epoch: [14][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0739e+00 (1.0226e+00)\n",
      "Epoch: [14][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0801e+00 (1.0228e+00)\n",
      "Epoch: [14][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0207e+00 (1.0228e+00)\n",
      "Epoch: [14][4050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2074e-01 (1.0229e+00)\n",
      "Epoch: [14][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7597e-01 (1.0231e+00)\n",
      "Epoch: [14][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0851e+00 (1.0232e+00)\n",
      "Epoch: [14][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1024e+00 (1.0231e+00)\n",
      "Epoch: [14][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9856e-01 (1.0233e+00)\n",
      "Epoch: [14][4300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1461e-01 (1.0236e+00)\n",
      "Epoch: [14][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7463e-01 (1.0236e+00)\n",
      "Epoch: [14][4400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0931e+00 (1.0236e+00)\n",
      "Epoch: [14][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1194e+00 (1.0237e+00)\n",
      "Epoch: [14][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0659e+00 (1.0235e+00)\n",
      "Epoch: [14][4550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0001e+00 (1.0237e+00)\n",
      "Epoch: [14][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1751e+00 (1.0242e+00)\n",
      "Epoch: [14][4650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9322e-01 (1.0239e+00)\n",
      "Epoch: [14][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7111e-01 (1.0240e+00)\n",
      "Epoch: [14][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8544e-01 (1.0242e+00)\n",
      "Epoch: [14][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0774e+00 (1.0240e+00)\n",
      "Epoch: [14][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1301e+00 (1.0240e+00)\n",
      "Epoch: [14][4900/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1036e+00 (1.0241e+00)\n",
      "Epoch: [14][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7783e-01 (1.0245e+00)\n",
      "Epoch: [14][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0385e+00 (1.0248e+00)\n",
      "Test: [  0/196]\tTime  3.297 ( 3.297)\tLoss 6.1811e-01 (6.1811e-01)\tAcc@1  83.20 ( 83.20)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.434)\tLoss 5.6839e-01 (8.3146e-01)\tAcc@1  85.94 ( 77.57)\tAcc@5  96.48 ( 94.28)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.5443e+00 (9.6614e-01)\tAcc@1  60.94 ( 74.72)\tAcc@5  85.94 ( 92.71)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.3673e+00 (1.0842e+00)\tAcc@1  68.36 ( 72.45)\tAcc@5  86.33 ( 91.16)\n",
      "epoch 14 1.024849782939939 71.50599670410156 0.002500000000000001 2353956 0.10040116859931866\n",
      "Epoch: [15][   0/5005]\tTime  3.353 ( 3.353)\tData  2.788 ( 2.788)\tLoss 1.0782e+00 (1.0782e+00)\n",
      "Epoch: [15][  50/5005]\tTime  0.559 ( 0.614)\tData  0.000 ( 0.055)\tLoss 8.8102e-01 (1.0026e+00)\n",
      "Epoch: [15][ 100/5005]\tTime  0.559 ( 0.587)\tData  0.000 ( 0.028)\tLoss 8.3839e-01 (9.9292e-01)\n",
      "Epoch: [15][ 150/5005]\tTime  0.557 ( 0.578)\tData  0.000 ( 0.019)\tLoss 1.0706e+00 (9.9546e-01)\n",
      "Epoch: [15][ 200/5005]\tTime  0.559 ( 0.573)\tData  0.000 ( 0.014)\tLoss 1.1176e+00 (9.9737e-01)\n",
      "Epoch: [15][ 250/5005]\tTime  0.559 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.0387e+00 (9.9638e-01)\n",
      "Epoch: [15][ 300/5005]\tTime  0.560 ( 0.569)\tData  0.000 ( 0.009)\tLoss 8.5534e-01 (9.9327e-01)\n",
      "Epoch: [15][ 350/5005]\tTime  0.558 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.0751e+00 (9.9298e-01)\n",
      "Epoch: [15][ 400/5005]\tTime  0.560 ( 0.566)\tData  0.000 ( 0.007)\tLoss 9.0998e-01 (9.9305e-01)\n",
      "Epoch: [15][ 450/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.006)\tLoss 1.0022e+00 (9.9437e-01)\n",
      "Epoch: [15][ 500/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 9.6512e-01 (9.9380e-01)\n",
      "Epoch: [15][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 1.0468e+00 (9.9518e-01)\n",
      "Epoch: [15][ 600/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.8110e-01 (9.9687e-01)\n",
      "Epoch: [15][ 650/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.004)\tLoss 1.1052e+00 (9.9600e-01)\n",
      "Epoch: [15][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.6311e-01 (9.9644e-01)\n",
      "Epoch: [15][ 750/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.2083e-01 (9.9589e-01)\n",
      "Epoch: [15][ 800/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.5328e-01 (9.9605e-01)\n",
      "Epoch: [15][ 850/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.003)\tLoss 9.5746e-01 (9.9703e-01)\n",
      "Epoch: [15][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.5226e-01 (9.9629e-01)\n",
      "Epoch: [15][ 950/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.2854e+00 (9.9516e-01)\n",
      "Epoch: [15][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0405e+00 (9.9524e-01)\n",
      "Epoch: [15][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0224e+00 (9.9445e-01)\n",
      "Epoch: [15][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.9637e-01 (9.9428e-01)\n",
      "Epoch: [15][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1527e+00 (9.9456e-01)\n",
      "Epoch: [15][1200/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0921e+00 (9.9525e-01)\n",
      "Epoch: [15][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6037e-01 (9.9555e-01)\n",
      "Epoch: [15][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.9919e-01 (9.9576e-01)\n",
      "Epoch: [15][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1727e-01 (9.9582e-01)\n",
      "Epoch: [15][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0705e+00 (9.9647e-01)\n",
      "Epoch: [15][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1205e+00 (9.9654e-01)\n",
      "Epoch: [15][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5669e-01 (9.9611e-01)\n",
      "Epoch: [15][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0298e+00 (9.9751e-01)\n",
      "Epoch: [15][1600/5005]\tTime  0.562 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0116e+00 (9.9761e-01)\n",
      "Epoch: [15][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5540e-01 (9.9736e-01)\n",
      "Epoch: [15][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.7016e-01 (9.9794e-01)\n",
      "Epoch: [15][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0347e+00 (9.9763e-01)\n",
      "Epoch: [15][1800/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.9204e-01 (9.9794e-01)\n",
      "Epoch: [15][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0108e+00 (9.9835e-01)\n",
      "Epoch: [15][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1491e-01 (9.9822e-01)\n",
      "Epoch: [15][1950/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1086e+00 (9.9904e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [15][2000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8435e-01 (9.9938e-01)\n",
      "Epoch: [15][2050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0656e+00 (9.9917e-01)\n",
      "Epoch: [15][2100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9064e-01 (9.9885e-01)\n",
      "Epoch: [15][2150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 9.1140e-01 (9.9893e-01)\n",
      "Epoch: [15][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5707e-01 (9.9848e-01)\n",
      "Epoch: [15][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9017e-01 (9.9877e-01)\n",
      "Epoch: [15][2300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6033e-01 (9.9895e-01)\n",
      "Epoch: [15][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0441e+00 (9.9942e-01)\n",
      "Epoch: [15][2400/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5871e-01 (9.9938e-01)\n",
      "Epoch: [15][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5966e-01 (9.9918e-01)\n",
      "Epoch: [15][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0278e+00 (9.9919e-01)\n",
      "Epoch: [15][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1480e+00 (9.9927e-01)\n",
      "Epoch: [15][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0364e+00 (9.9953e-01)\n",
      "Epoch: [15][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1107e-01 (9.9982e-01)\n",
      "Epoch: [15][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7000e-01 (9.9961e-01)\n",
      "Epoch: [15][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1577e+00 (9.9987e-01)\n",
      "Epoch: [15][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1726e+00 (1.0003e+00)\n",
      "Epoch: [15][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3765e-01 (1.0008e+00)\n",
      "Epoch: [15][2900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0114e+00 (1.0010e+00)\n",
      "Epoch: [15][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0869e+00 (1.0009e+00)\n",
      "Epoch: [15][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0998e+00 (1.0009e+00)\n",
      "Epoch: [15][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0431e+00 (1.0011e+00)\n",
      "Epoch: [15][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6997e-01 (1.0011e+00)\n",
      "Epoch: [15][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0163e+00 (1.0010e+00)\n",
      "Epoch: [15][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4097e-01 (1.0011e+00)\n",
      "Epoch: [15][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0228e+00 (1.0014e+00)\n",
      "Epoch: [15][3300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3163e-01 (1.0014e+00)\n",
      "Epoch: [15][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1007e+00 (1.0013e+00)\n",
      "Epoch: [15][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9887e-01 (1.0017e+00)\n",
      "Epoch: [15][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0258e+00 (1.0016e+00)\n",
      "Epoch: [15][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6951e-01 (1.0019e+00)\n",
      "Epoch: [15][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4206e-01 (1.0019e+00)\n",
      "Epoch: [15][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9142e-01 (1.0025e+00)\n",
      "Epoch: [15][3650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3322e-01 (1.0024e+00)\n",
      "Epoch: [15][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3451e-01 (1.0020e+00)\n",
      "Epoch: [15][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4045e-01 (1.0021e+00)\n",
      "Epoch: [15][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6992e-01 (1.0022e+00)\n",
      "Epoch: [15][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2429e-01 (1.0024e+00)\n",
      "Epoch: [15][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1394e+00 (1.0027e+00)\n",
      "Epoch: [15][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1768e+00 (1.0032e+00)\n",
      "Epoch: [15][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0842e-01 (1.0028e+00)\n",
      "Epoch: [15][4050/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1185e+00 (1.0027e+00)\n",
      "Epoch: [15][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0056e+00 (1.0027e+00)\n",
      "Epoch: [15][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1867e+00 (1.0028e+00)\n",
      "Epoch: [15][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0566e+00 (1.0029e+00)\n",
      "Epoch: [15][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9089e-01 (1.0029e+00)\n",
      "Epoch: [15][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6978e-01 (1.0029e+00)\n",
      "Epoch: [15][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1504e+00 (1.0031e+00)\n",
      "Epoch: [15][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2368e-01 (1.0034e+00)\n",
      "Epoch: [15][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0220e+00 (1.0037e+00)\n",
      "Epoch: [15][4500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6370e-01 (1.0038e+00)\n",
      "Epoch: [15][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5732e-01 (1.0041e+00)\n",
      "Epoch: [15][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2203e-01 (1.0041e+00)\n",
      "Epoch: [15][4650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0083e+00 (1.0044e+00)\n",
      "Epoch: [15][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1439e+00 (1.0047e+00)\n",
      "Epoch: [15][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5810e-01 (1.0048e+00)\n",
      "Epoch: [15][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1145e+00 (1.0049e+00)\n",
      "Epoch: [15][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0564e-01 (1.0047e+00)\n",
      "Epoch: [15][4900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4785e-01 (1.0047e+00)\n",
      "Epoch: [15][4950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8535e-01 (1.0047e+00)\n",
      "Epoch: [15][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7717e-01 (1.0050e+00)\n",
      "Test: [  0/196]\tTime  3.347 ( 3.347)\tLoss 6.0842e-01 (6.0842e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 5.3009e-01 (8.1526e-01)\tAcc@1  86.33 ( 78.28)\tAcc@5  96.48 ( 94.43)\n",
      "Test: [100/196]\tTime  0.377 ( 0.407)\tLoss 1.4883e+00 (9.4830e-01)\tAcc@1  59.77 ( 75.24)\tAcc@5  86.72 ( 92.96)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.1745e+00 (1.0685e+00)\tAcc@1  76.56 ( 73.00)\tAcc@5  87.89 ( 91.37)\n",
      "epoch 15 1.0049841605255792 71.94999694824219 0.0020000000000000005 2353956 0.10040116859931866\n",
      "Epoch: [16][   0/5005]\tTime  2.976 ( 2.976)\tData  2.409 ( 2.409)\tLoss 9.1098e-01 (9.1098e-01)\n",
      "Epoch: [16][  50/5005]\tTime  0.560 ( 0.607)\tData  0.000 ( 0.047)\tLoss 9.6423e-01 (9.8797e-01)\n",
      "Epoch: [16][ 100/5005]\tTime  0.559 ( 0.583)\tData  0.000 ( 0.024)\tLoss 8.9552e-01 (9.8751e-01)\n",
      "Epoch: [16][ 150/5005]\tTime  0.560 ( 0.575)\tData  0.000 ( 0.016)\tLoss 1.0841e+00 (9.8917e-01)\n",
      "Epoch: [16][ 200/5005]\tTime  0.559 ( 0.571)\tData  0.000 ( 0.012)\tLoss 9.1238e-01 (9.8025e-01)\n",
      "Epoch: [16][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 9.7992e-01 (9.7379e-01)\n",
      "Epoch: [16][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 9.0944e-01 (9.7623e-01)\n",
      "Epoch: [16][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0896e+00 (9.7921e-01)\n",
      "Epoch: [16][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.1422e+00 (9.7875e-01)\n",
      "Epoch: [16][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0357e+00 (9.7922e-01)\n",
      "Epoch: [16][ 500/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.2086e-01 (9.7908e-01)\n",
      "Epoch: [16][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 8.6448e-01 (9.7888e-01)\n",
      "Epoch: [16][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0393e+00 (9.8107e-01)\n",
      "Epoch: [16][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.0544e-01 (9.7977e-01)\n",
      "Epoch: [16][ 700/5005]\tTime  0.558 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.4720e-01 (9.7859e-01)\n",
      "Epoch: [16][ 750/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.9684e-01 (9.7780e-01)\n",
      "Epoch: [16][ 800/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.8302e-01 (9.7754e-01)\n",
      "Epoch: [16][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.6630e-01 (9.7717e-01)\n",
      "Epoch: [16][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1161e+00 (9.7579e-01)\n",
      "Epoch: [16][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.3870e-01 (9.7560e-01)\n",
      "Epoch: [16][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.0890e-01 (9.7492e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [16][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.002)\tLoss 1.1207e+00 (9.7553e-01)\n",
      "Epoch: [16][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2031e+00 (9.7635e-01)\n",
      "Epoch: [16][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8007e-01 (9.7719e-01)\n",
      "Epoch: [16][1200/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4168e-01 (9.7755e-01)\n",
      "Epoch: [16][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.5214e-01 (9.7743e-01)\n",
      "Epoch: [16][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0205e+00 (9.7657e-01)\n",
      "Epoch: [16][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0567e+00 (9.7721e-01)\n",
      "Epoch: [16][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0068e+00 (9.7651e-01)\n",
      "Epoch: [16][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.0601e-01 (9.7722e-01)\n",
      "Epoch: [16][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6612e-01 (9.7657e-01)\n",
      "Epoch: [16][1550/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1027e+00 (9.7771e-01)\n",
      "Epoch: [16][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0727e+00 (9.7875e-01)\n",
      "Epoch: [16][1650/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8475e-01 (9.7841e-01)\n",
      "Epoch: [16][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.4914e-01 (9.7904e-01)\n",
      "Epoch: [16][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8586e-01 (9.7936e-01)\n",
      "Epoch: [16][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.2403e+00 (9.7912e-01)\n",
      "Epoch: [16][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 9.7611e-01 (9.7888e-01)\n",
      "Epoch: [16][1900/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 9.1550e-01 (9.7895e-01)\n",
      "Epoch: [16][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4572e-01 (9.7936e-01)\n",
      "Epoch: [16][2000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6953e-01 (9.7895e-01)\n",
      "Epoch: [16][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7167e-01 (9.7921e-01)\n",
      "Epoch: [16][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0356e+00 (9.7859e-01)\n",
      "Epoch: [16][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3872e-01 (9.7870e-01)\n",
      "Epoch: [16][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0991e+00 (9.7916e-01)\n",
      "Epoch: [16][2250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2773e-01 (9.7913e-01)\n",
      "Epoch: [16][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.6508e-01 (9.7886e-01)\n",
      "Epoch: [16][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0363e+00 (9.7938e-01)\n",
      "Epoch: [16][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1543e+00 (9.7998e-01)\n",
      "Epoch: [16][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5511e-01 (9.8027e-01)\n",
      "Epoch: [16][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7838e-01 (9.8075e-01)\n",
      "Epoch: [16][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0295e+00 (9.8094e-01)\n",
      "Epoch: [16][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1189e-01 (9.8114e-01)\n",
      "Epoch: [16][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5957e-01 (9.8138e-01)\n",
      "Epoch: [16][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.0197e-01 (9.8144e-01)\n",
      "Epoch: [16][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3156e-01 (9.8175e-01)\n",
      "Epoch: [16][2800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6277e-01 (9.8174e-01)\n",
      "Epoch: [16][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9796e-01 (9.8209e-01)\n",
      "Epoch: [16][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4683e-01 (9.8229e-01)\n",
      "Epoch: [16][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0979e+00 (9.8235e-01)\n",
      "Epoch: [16][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4055e-01 (9.8207e-01)\n",
      "Epoch: [16][3050/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6119e-01 (9.8190e-01)\n",
      "Epoch: [16][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0583e+00 (9.8165e-01)\n",
      "Epoch: [16][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0477e+00 (9.8174e-01)\n",
      "Epoch: [16][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1750e+00 (9.8218e-01)\n",
      "Epoch: [16][3250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7059e-01 (9.8170e-01)\n",
      "Epoch: [16][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1603e-01 (9.8201e-01)\n",
      "Epoch: [16][3350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3944e-01 (9.8201e-01)\n",
      "Epoch: [16][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1686e+00 (9.8190e-01)\n",
      "Epoch: [16][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0722e-01 (9.8188e-01)\n",
      "Epoch: [16][3500/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0688e+00 (9.8194e-01)\n",
      "Epoch: [16][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1872e-01 (9.8193e-01)\n",
      "Epoch: [16][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5342e-01 (9.8155e-01)\n",
      "Epoch: [16][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7104e-01 (9.8175e-01)\n",
      "Epoch: [16][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4022e-01 (9.8161e-01)\n",
      "Epoch: [16][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1354e+00 (9.8195e-01)\n",
      "Epoch: [16][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0524e+00 (9.8206e-01)\n",
      "Epoch: [16][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8274e-01 (9.8240e-01)\n",
      "Epoch: [16][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0494e+00 (9.8226e-01)\n",
      "Epoch: [16][3950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1484e-01 (9.8266e-01)\n",
      "Epoch: [16][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6232e-01 (9.8292e-01)\n",
      "Epoch: [16][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2275e+00 (9.8283e-01)\n",
      "Epoch: [16][4100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1160e+00 (9.8278e-01)\n",
      "Epoch: [16][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0566e+00 (9.8305e-01)\n",
      "Epoch: [16][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4797e-01 (9.8306e-01)\n",
      "Epoch: [16][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0095e+00 (9.8317e-01)\n",
      "Epoch: [16][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0979e+00 (9.8314e-01)\n",
      "Epoch: [16][4350/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9426e-01 (9.8340e-01)\n",
      "Epoch: [16][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2757e-01 (9.8354e-01)\n",
      "Epoch: [16][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1062e+00 (9.8366e-01)\n",
      "Epoch: [16][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0362e+00 (9.8404e-01)\n",
      "Epoch: [16][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1022e+00 (9.8445e-01)\n",
      "Epoch: [16][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0033e+00 (9.8476e-01)\n",
      "Epoch: [16][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0727e+00 (9.8479e-01)\n",
      "Epoch: [16][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6636e-01 (9.8491e-01)\n",
      "Epoch: [16][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5521e-01 (9.8472e-01)\n",
      "Epoch: [16][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8086e-01 (9.8469e-01)\n",
      "Epoch: [16][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5280e-01 (9.8476e-01)\n",
      "Epoch: [16][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0783e+00 (9.8469e-01)\n",
      "Epoch: [16][4950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1133e+00 (9.8441e-01)\n",
      "Epoch: [16][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0692e+00 (9.8465e-01)\n",
      "Test: [  0/196]\tTime  3.361 ( 3.361)\tLoss 5.9995e-01 (5.9995e-01)\tAcc@1  85.16 ( 85.16)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.378 ( 0.436)\tLoss 4.6744e-01 (8.0341e-01)\tAcc@1  89.06 ( 78.84)\tAcc@5  96.88 ( 94.59)\n",
      "Test: [100/196]\tTime  0.378 ( 0.407)\tLoss 1.4775e+00 (9.3917e-01)\tAcc@1  62.11 ( 75.81)\tAcc@5  87.50 ( 93.22)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.2067e+00 (1.0554e+00)\tAcc@1  74.61 ( 73.48)\tAcc@5  88.67 ( 91.63)\n",
      "epoch 16 0.9846257691848722 72.46599578857422 0.0015000000000000011 2353956 0.10040116859931866\n",
      "Epoch: [17][   0/5005]\tTime  2.816 ( 2.816)\tData  2.256 ( 2.256)\tLoss 9.9772e-01 (9.9772e-01)\n",
      "Epoch: [17][  50/5005]\tTime  0.559 ( 0.604)\tData  0.000 ( 0.044)\tLoss 9.4089e-01 (9.6777e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [17][ 100/5005]\tTime  0.559 ( 0.582)\tData  0.000 ( 0.023)\tLoss 9.2221e-01 (9.7387e-01)\n",
      "Epoch: [17][ 150/5005]\tTime  0.560 ( 0.574)\tData  0.000 ( 0.015)\tLoss 7.8894e-01 (9.7570e-01)\n",
      "Epoch: [17][ 200/5005]\tTime  0.560 ( 0.570)\tData  0.000 ( 0.011)\tLoss 1.0130e+00 (9.6431e-01)\n",
      "Epoch: [17][ 250/5005]\tTime  0.559 ( 0.568)\tData  0.000 ( 0.009)\tLoss 1.0155e+00 (9.6444e-01)\n",
      "Epoch: [17][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 1.0555e+00 (9.6541e-01)\n",
      "Epoch: [17][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0263e+00 (9.6509e-01)\n",
      "Epoch: [17][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 9.2433e-01 (9.6767e-01)\n",
      "Epoch: [17][ 450/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.8653e-01 (9.6747e-01)\n",
      "Epoch: [17][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.5321e-01 (9.6836e-01)\n",
      "Epoch: [17][ 550/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.0155e-01 (9.6855e-01)\n",
      "Epoch: [17][ 600/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 1.0685e+00 (9.6685e-01)\n",
      "Epoch: [17][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.8274e-01 (9.6522e-01)\n",
      "Epoch: [17][ 700/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.8652e-01 (9.6569e-01)\n",
      "Epoch: [17][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.1459e-01 (9.6573e-01)\n",
      "Epoch: [17][ 800/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.4791e-01 (9.6614e-01)\n",
      "Epoch: [17][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1052e+00 (9.6718e-01)\n",
      "Epoch: [17][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1699e+00 (9.6566e-01)\n",
      "Epoch: [17][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0563e+00 (9.6615e-01)\n",
      "Epoch: [17][1000/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8041e-01 (9.6549e-01)\n",
      "Epoch: [17][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5389e-01 (9.6635e-01)\n",
      "Epoch: [17][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.8540e-01 (9.6715e-01)\n",
      "Epoch: [17][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0578e+00 (9.6600e-01)\n",
      "Epoch: [17][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1522e+00 (9.6656e-01)\n",
      "Epoch: [17][1250/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0169e+00 (9.6573e-01)\n",
      "Epoch: [17][1300/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0922e+00 (9.6571e-01)\n",
      "Epoch: [17][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0203e+00 (9.6655e-01)\n",
      "Epoch: [17][1400/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7575e-01 (9.6618e-01)\n",
      "Epoch: [17][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0397e+00 (9.6586e-01)\n",
      "Epoch: [17][1500/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6126e-01 (9.6589e-01)\n",
      "Epoch: [17][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.9240e-01 (9.6586e-01)\n",
      "Epoch: [17][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.3007e-01 (9.6522e-01)\n",
      "Epoch: [17][1650/5005]\tTime  0.558 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.2087e-01 (9.6557e-01)\n",
      "Epoch: [17][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5883e-01 (9.6492e-01)\n",
      "Epoch: [17][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.001)\tLoss 8.6201e-01 (9.6455e-01)\n",
      "Epoch: [17][1800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6760e-01 (9.6485e-01)\n",
      "Epoch: [17][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6760e-01 (9.6451e-01)\n",
      "Epoch: [17][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2437e-01 (9.6485e-01)\n",
      "Epoch: [17][1950/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9899e-01 (9.6442e-01)\n",
      "Epoch: [17][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9646e-01 (9.6438e-01)\n",
      "Epoch: [17][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7241e-01 (9.6490e-01)\n",
      "Epoch: [17][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1674e+00 (9.6512e-01)\n",
      "Epoch: [17][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0173e+00 (9.6490e-01)\n",
      "Epoch: [17][2200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0941e+00 (9.6519e-01)\n",
      "Epoch: [17][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5034e-01 (9.6522e-01)\n",
      "Epoch: [17][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2021e-01 (9.6540e-01)\n",
      "Epoch: [17][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8984e-01 (9.6546e-01)\n",
      "Epoch: [17][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0571e+00 (9.6534e-01)\n",
      "Epoch: [17][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5004e-01 (9.6507e-01)\n",
      "Epoch: [17][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1071e+00 (9.6498e-01)\n",
      "Epoch: [17][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0308e+00 (9.6487e-01)\n",
      "Epoch: [17][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1142e+00 (9.6534e-01)\n",
      "Epoch: [17][2650/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4944e-01 (9.6534e-01)\n",
      "Epoch: [17][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6618e-01 (9.6507e-01)\n",
      "Epoch: [17][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7928e-01 (9.6504e-01)\n",
      "Epoch: [17][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0414e+00 (9.6514e-01)\n",
      "Epoch: [17][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6877e-01 (9.6503e-01)\n",
      "Epoch: [17][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4276e-01 (9.6528e-01)\n",
      "Epoch: [17][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1056e+00 (9.6515e-01)\n",
      "Epoch: [17][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0784e+00 (9.6506e-01)\n",
      "Epoch: [17][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0462e+00 (9.6496e-01)\n",
      "Epoch: [17][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3133e-01 (9.6485e-01)\n",
      "Epoch: [17][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2565e-01 (9.6493e-01)\n",
      "Epoch: [17][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0519e+00 (9.6515e-01)\n",
      "Epoch: [17][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7858e-01 (9.6515e-01)\n",
      "Epoch: [17][3300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0107e+00 (9.6567e-01)\n",
      "Epoch: [17][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0121e+00 (9.6597e-01)\n",
      "Epoch: [17][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7855e-01 (9.6596e-01)\n",
      "Epoch: [17][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0192e+00 (9.6620e-01)\n",
      "Epoch: [17][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2031e-01 (9.6604e-01)\n",
      "Epoch: [17][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8003e-01 (9.6609e-01)\n",
      "Epoch: [17][3600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1464e+00 (9.6595e-01)\n",
      "Epoch: [17][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6440e-01 (9.6593e-01)\n",
      "Epoch: [17][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1587e+00 (9.6593e-01)\n",
      "Epoch: [17][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0338e+00 (9.6581e-01)\n",
      "Epoch: [17][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9122e-01 (9.6588e-01)\n",
      "Epoch: [17][3850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6801e-01 (9.6598e-01)\n",
      "Epoch: [17][3900/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3085e-01 (9.6640e-01)\n",
      "Epoch: [17][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8793e-01 (9.6633e-01)\n",
      "Epoch: [17][4000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1001e+00 (9.6648e-01)\n",
      "Epoch: [17][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3051e-01 (9.6648e-01)\n",
      "Epoch: [17][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0340e+00 (9.6639e-01)\n",
      "Epoch: [17][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4320e-01 (9.6618e-01)\n",
      "Epoch: [17][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7847e-01 (9.6594e-01)\n",
      "Epoch: [17][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0011e+00 (9.6635e-01)\n",
      "Epoch: [17][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4745e-01 (9.6640e-01)\n",
      "Epoch: [17][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1056e+00 (9.6617e-01)\n",
      "Epoch: [17][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8298e-01 (9.6605e-01)\n",
      "Epoch: [17][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9375e-01 (9.6606e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [17][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5495e-01 (9.6637e-01)\n",
      "Epoch: [17][4550/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5907e-01 (9.6613e-01)\n",
      "Epoch: [17][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7828e-01 (9.6623e-01)\n",
      "Epoch: [17][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7708e-01 (9.6627e-01)\n",
      "Epoch: [17][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1006e+00 (9.6629e-01)\n",
      "Epoch: [17][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8662e-01 (9.6645e-01)\n",
      "Epoch: [17][4800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9863e-01 (9.6652e-01)\n",
      "Epoch: [17][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8229e-01 (9.6663e-01)\n",
      "Epoch: [17][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4706e-01 (9.6645e-01)\n",
      "Epoch: [17][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1920e-01 (9.6658e-01)\n",
      "Epoch: [17][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4584e-01 (9.6643e-01)\n",
      "Test: [  0/196]\tTime  3.320 ( 3.320)\tLoss 5.8591e-01 (5.8591e-01)\tAcc@1  82.03 ( 82.03)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 4.5361e-01 (7.9373e-01)\tAcc@1  89.84 ( 78.98)\tAcc@5  97.27 ( 94.75)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.4144e+00 (9.2893e-01)\tAcc@1  61.72 ( 75.87)\tAcc@5  87.89 ( 93.29)\n",
      "Test: [150/196]\tTime  0.377 ( 0.397)\tLoss 1.1708e+00 (1.0460e+00)\tAcc@1  76.17 ( 73.60)\tAcc@5  87.50 ( 91.75)\n",
      "epoch 17 0.966463084890984 72.65399932861328 0.0010000000000000005 2353956 0.10040116859931866\n",
      "Epoch: [18][   0/5005]\tTime  3.123 ( 3.123)\tData  2.563 ( 2.563)\tLoss 9.7025e-01 (9.7025e-01)\n",
      "Epoch: [18][  50/5005]\tTime  0.559 ( 0.610)\tData  0.000 ( 0.050)\tLoss 1.0853e+00 (9.6538e-01)\n",
      "Epoch: [18][ 100/5005]\tTime  0.560 ( 0.585)\tData  0.000 ( 0.026)\tLoss 8.7623e-01 (9.5455e-01)\n",
      "Epoch: [18][ 150/5005]\tTime  0.559 ( 0.576)\tData  0.000 ( 0.017)\tLoss 1.0177e+00 (9.4573e-01)\n",
      "Epoch: [18][ 200/5005]\tTime  0.560 ( 0.572)\tData  0.000 ( 0.013)\tLoss 8.2944e-01 (9.3771e-01)\n",
      "Epoch: [18][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 7.4875e-01 (9.3751e-01)\n",
      "Epoch: [18][ 300/5005]\tTime  0.560 ( 0.568)\tData  0.000 ( 0.009)\tLoss 9.8452e-01 (9.3877e-01)\n",
      "Epoch: [18][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.008)\tLoss 8.9107e-01 (9.3766e-01)\n",
      "Epoch: [18][ 400/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 1.0152e+00 (9.3975e-01)\n",
      "Epoch: [18][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0496e+00 (9.4300e-01)\n",
      "Epoch: [18][ 500/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 6.7213e-01 (9.4217e-01)\n",
      "Epoch: [18][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.0596e-01 (9.4555e-01)\n",
      "Epoch: [18][ 600/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.004)\tLoss 9.8850e-01 (9.4594e-01)\n",
      "Epoch: [18][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.3105e-01 (9.4715e-01)\n",
      "Epoch: [18][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.0761e-01 (9.4733e-01)\n",
      "Epoch: [18][ 750/5005]\tTime  0.560 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.9855e-01 (9.4684e-01)\n",
      "Epoch: [18][ 800/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0059e+00 (9.4653e-01)\n",
      "Epoch: [18][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.8008e-01 (9.4685e-01)\n",
      "Epoch: [18][ 900/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.1706e+00 (9.4666e-01)\n",
      "Epoch: [18][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.0647e-01 (9.4672e-01)\n",
      "Epoch: [18][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.9874e-01 (9.4642e-01)\n",
      "Epoch: [18][1050/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.4221e-01 (9.4678e-01)\n",
      "Epoch: [18][1100/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.7123e-01 (9.4607e-01)\n",
      "Epoch: [18][1150/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.002)\tLoss 9.1020e-01 (9.4634e-01)\n",
      "Epoch: [18][1200/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4648e-01 (9.4706e-01)\n",
      "Epoch: [18][1250/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.2465e-01 (9.4729e-01)\n",
      "Epoch: [18][1300/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.8044e-01 (9.4662e-01)\n",
      "Epoch: [18][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7716e-01 (9.4680e-01)\n",
      "Epoch: [18][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.8844e-01 (9.4580e-01)\n",
      "Epoch: [18][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5454e-01 (9.4555e-01)\n",
      "Epoch: [18][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.6159e-01 (9.4462e-01)\n",
      "Epoch: [18][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.0093e-01 (9.4406e-01)\n",
      "Epoch: [18][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.1120e+00 (9.4445e-01)\n",
      "Epoch: [18][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4709e-01 (9.4433e-01)\n",
      "Epoch: [18][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1727e-01 (9.4391e-01)\n",
      "Epoch: [18][1750/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0448e+00 (9.4466e-01)\n",
      "Epoch: [18][1800/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1778e-01 (9.4504e-01)\n",
      "Epoch: [18][1850/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.5139e-01 (9.4504e-01)\n",
      "Epoch: [18][1900/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1848e-01 (9.4468e-01)\n",
      "Epoch: [18][1950/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0173e+00 (9.4475e-01)\n",
      "Epoch: [18][2000/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.1580e-01 (9.4441e-01)\n",
      "Epoch: [18][2050/5005]\tTime  0.560 ( 0.561)\tData  0.000 ( 0.001)\tLoss 8.9561e-01 (9.4423e-01)\n",
      "Epoch: [18][2100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7297e-01 (9.4465e-01)\n",
      "Epoch: [18][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0140e+00 (9.4492e-01)\n",
      "Epoch: [18][2200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.4029e-01 (9.4506e-01)\n",
      "Epoch: [18][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5462e-01 (9.4500e-01)\n",
      "Epoch: [18][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7853e-01 (9.4455e-01)\n",
      "Epoch: [18][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0046e+00 (9.4445e-01)\n",
      "Epoch: [18][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2928e-01 (9.4453e-01)\n",
      "Epoch: [18][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.0883e-01 (9.4484e-01)\n",
      "Epoch: [18][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0232e+00 (9.4469e-01)\n",
      "Epoch: [18][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9080e-01 (9.4459e-01)\n",
      "Epoch: [18][2600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.7935e-01 (9.4450e-01)\n",
      "Epoch: [18][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4016e-01 (9.4472e-01)\n",
      "Epoch: [18][2700/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9919e-01 (9.4478e-01)\n",
      "Epoch: [18][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0304e+00 (9.4516e-01)\n",
      "Epoch: [18][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9134e-01 (9.4549e-01)\n",
      "Epoch: [18][2850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5033e-01 (9.4524e-01)\n",
      "Epoch: [18][2900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7817e-01 (9.4524e-01)\n",
      "Epoch: [18][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.2148e+00 (9.4513e-01)\n",
      "Epoch: [18][3000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4579e-01 (9.4496e-01)\n",
      "Epoch: [18][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0095e+00 (9.4525e-01)\n",
      "Epoch: [18][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3097e-01 (9.4544e-01)\n",
      "Epoch: [18][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1540e+00 (9.4548e-01)\n",
      "Epoch: [18][3200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0691e-01 (9.4560e-01)\n",
      "Epoch: [18][3250/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.3462e-01 (9.4533e-01)\n",
      "Epoch: [18][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9531e-01 (9.4538e-01)\n",
      "Epoch: [18][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7111e-01 (9.4582e-01)\n",
      "Epoch: [18][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9728e-01 (9.4554e-01)\n",
      "Epoch: [18][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8482e-01 (9.4571e-01)\n",
      "Epoch: [18][3500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0253e-01 (9.4602e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [18][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8466e-01 (9.4548e-01)\n",
      "Epoch: [18][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7771e-01 (9.4552e-01)\n",
      "Epoch: [18][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5421e-01 (9.4554e-01)\n",
      "Epoch: [18][3700/5005]\tTime  0.562 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9104e-01 (9.4536e-01)\n",
      "Epoch: [18][3750/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5887e-01 (9.4512e-01)\n",
      "Epoch: [18][3800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4017e-01 (9.4517e-01)\n",
      "Epoch: [18][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1595e+00 (9.4495e-01)\n",
      "Epoch: [18][3900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1105e+00 (9.4500e-01)\n",
      "Epoch: [18][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0718e+00 (9.4496e-01)\n",
      "Epoch: [18][4000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6237e-01 (9.4479e-01)\n",
      "Epoch: [18][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0511e-01 (9.4521e-01)\n",
      "Epoch: [18][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9609e-01 (9.4575e-01)\n",
      "Epoch: [18][4150/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8833e-01 (9.4601e-01)\n",
      "Epoch: [18][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0252e+00 (9.4594e-01)\n",
      "Epoch: [18][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6268e-01 (9.4605e-01)\n",
      "Epoch: [18][4300/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5566e-01 (9.4627e-01)\n",
      "Epoch: [18][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1694e-01 (9.4616e-01)\n",
      "Epoch: [18][4400/5005]\tTime  0.561 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9796e-01 (9.4651e-01)\n",
      "Epoch: [18][4450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1673e+00 (9.4649e-01)\n",
      "Epoch: [18][4500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5925e-01 (9.4645e-01)\n",
      "Epoch: [18][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.5318e-01 (9.4629e-01)\n",
      "Epoch: [18][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0590e-01 (9.4630e-01)\n",
      "Epoch: [18][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6721e-01 (9.4650e-01)\n",
      "Epoch: [18][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1008e+00 (9.4660e-01)\n",
      "Epoch: [18][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1543e+00 (9.4670e-01)\n",
      "Epoch: [18][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.9395e-01 (9.4662e-01)\n",
      "Epoch: [18][4850/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 6.9845e-01 (9.4679e-01)\n",
      "Epoch: [18][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7959e-01 (9.4683e-01)\n",
      "Epoch: [18][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4140e-01 (9.4674e-01)\n",
      "Epoch: [18][5000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.3201e-01 (9.4667e-01)\n",
      "Test: [  0/196]\tTime  3.292 ( 3.292)\tLoss 5.6479e-01 (5.6479e-01)\tAcc@1  85.55 ( 85.55)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.434)\tLoss 5.2510e-01 (7.7921e-01)\tAcc@1  86.72 ( 79.36)\tAcc@5  96.88 ( 94.79)\n",
      "Test: [100/196]\tTime  0.378 ( 0.406)\tLoss 1.3770e+00 (9.1582e-01)\tAcc@1  64.06 ( 76.22)\tAcc@5  88.28 ( 93.41)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.2369e+00 (1.0314e+00)\tAcc@1  74.22 ( 73.90)\tAcc@5  88.28 ( 91.87)\n",
      "epoch 18 0.9466382803146979 72.9219970703125 0.0005000000000000008 2353956 0.10040116859931866\n",
      "Epoch: [19][   0/5005]\tTime  2.974 ( 2.974)\tData  2.415 ( 2.415)\tLoss 8.0634e-01 (8.0634e-01)\n",
      "Epoch: [19][  50/5005]\tTime  0.559 ( 0.607)\tData  0.000 ( 0.048)\tLoss 9.5194e-01 (9.1445e-01)\n",
      "Epoch: [19][ 100/5005]\tTime  0.559 ( 0.583)\tData  0.000 ( 0.024)\tLoss 1.0142e+00 (9.2180e-01)\n",
      "Epoch: [19][ 150/5005]\tTime  0.559 ( 0.575)\tData  0.000 ( 0.016)\tLoss 9.6906e-01 (9.2357e-01)\n",
      "Epoch: [19][ 200/5005]\tTime  0.560 ( 0.571)\tData  0.000 ( 0.012)\tLoss 1.0881e+00 (9.3349e-01)\n",
      "Epoch: [19][ 250/5005]\tTime  0.559 ( 0.569)\tData  0.000 ( 0.010)\tLoss 1.0541e+00 (9.3276e-01)\n",
      "Epoch: [19][ 300/5005]\tTime  0.559 ( 0.567)\tData  0.000 ( 0.008)\tLoss 9.3574e-01 (9.3241e-01)\n",
      "Epoch: [19][ 350/5005]\tTime  0.559 ( 0.566)\tData  0.000 ( 0.007)\tLoss 8.0441e-01 (9.3219e-01)\n",
      "Epoch: [19][ 400/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 7.9404e-01 (9.3346e-01)\n",
      "Epoch: [19][ 450/5005]\tTime  0.559 ( 0.565)\tData  0.000 ( 0.006)\tLoss 1.0100e+00 (9.3315e-01)\n",
      "Epoch: [19][ 500/5005]\tTime  0.560 ( 0.564)\tData  0.000 ( 0.005)\tLoss 9.1956e-01 (9.3296e-01)\n",
      "Epoch: [19][ 550/5005]\tTime  0.559 ( 0.564)\tData  0.000 ( 0.005)\tLoss 7.7299e-01 (9.3245e-01)\n",
      "Epoch: [19][ 600/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 7.2491e-01 (9.3111e-01)\n",
      "Epoch: [19][ 650/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 8.9464e-01 (9.3080e-01)\n",
      "Epoch: [19][ 700/5005]\tTime  0.559 ( 0.563)\tData  0.000 ( 0.004)\tLoss 9.0187e-01 (9.3069e-01)\n",
      "Epoch: [19][ 750/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 1.0018e+00 (9.3068e-01)\n",
      "Epoch: [19][ 800/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.5738e-01 (9.3123e-01)\n",
      "Epoch: [19][ 850/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 9.0753e-01 (9.3089e-01)\n",
      "Epoch: [19][ 900/5005]\tTime  0.560 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.5783e-01 (9.2980e-01)\n",
      "Epoch: [19][ 950/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 8.5828e-01 (9.3035e-01)\n",
      "Epoch: [19][1000/5005]\tTime  0.559 ( 0.562)\tData  0.000 ( 0.003)\tLoss 7.9857e-01 (9.3049e-01)\n",
      "Epoch: [19][1050/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.7390e-01 (9.3014e-01)\n",
      "Epoch: [19][1100/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.4907e-01 (9.3025e-01)\n",
      "Epoch: [19][1150/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 7.7902e-01 (9.2932e-01)\n",
      "Epoch: [19][1200/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.1356e-01 (9.2893e-01)\n",
      "Epoch: [19][1250/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0853e+00 (9.2934e-01)\n",
      "Epoch: [19][1300/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.5430e-01 (9.2941e-01)\n",
      "Epoch: [19][1350/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.9159e-01 (9.2976e-01)\n",
      "Epoch: [19][1400/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.7486e-01 (9.2965e-01)\n",
      "Epoch: [19][1450/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.3203e-01 (9.2915e-01)\n",
      "Epoch: [19][1500/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.1195e-01 (9.2856e-01)\n",
      "Epoch: [19][1550/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 1.0853e+00 (9.2844e-01)\n",
      "Epoch: [19][1600/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 8.7451e-01 (9.2860e-01)\n",
      "Epoch: [19][1650/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.9527e-01 (9.2915e-01)\n",
      "Epoch: [19][1700/5005]\tTime  0.559 ( 0.561)\tData  0.000 ( 0.002)\tLoss 9.6958e-01 (9.2924e-01)\n",
      "Epoch: [19][1750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.002)\tLoss 9.6560e-01 (9.2845e-01)\n",
      "Epoch: [19][1800/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.002)\tLoss 1.0073e+00 (9.2849e-01)\n",
      "Epoch: [19][1850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.8792e-01 (9.2806e-01)\n",
      "Epoch: [19][1900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0670e+00 (9.2801e-01)\n",
      "Epoch: [19][1950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7779e-01 (9.2725e-01)\n",
      "Epoch: [19][2000/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7441e-01 (9.2759e-01)\n",
      "Epoch: [19][2050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4241e-01 (9.2806e-01)\n",
      "Epoch: [19][2100/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3842e-01 (9.2746e-01)\n",
      "Epoch: [19][2150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0113e+00 (9.2734e-01)\n",
      "Epoch: [19][2200/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8293e-01 (9.2763e-01)\n",
      "Epoch: [19][2250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8089e-01 (9.2729e-01)\n",
      "Epoch: [19][2300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0707e+00 (9.2725e-01)\n",
      "Epoch: [19][2350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9687e-01 (9.2718e-01)\n",
      "Epoch: [19][2400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0917e+00 (9.2712e-01)\n",
      "Epoch: [19][2450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6647e-01 (9.2684e-01)\n",
      "Epoch: [19][2500/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8400e-01 (9.2718e-01)\n",
      "Epoch: [19][2550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9564e-01 (9.2684e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [19][2600/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8835e-01 (9.2693e-01)\n",
      "Epoch: [19][2650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2079e-01 (9.2709e-01)\n",
      "Epoch: [19][2700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0275e-01 (9.2700e-01)\n",
      "Epoch: [19][2750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0001e+00 (9.2702e-01)\n",
      "Epoch: [19][2800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0702e+00 (9.2680e-01)\n",
      "Epoch: [19][2850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0462e+00 (9.2708e-01)\n",
      "Epoch: [19][2900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.1197e-01 (9.2731e-01)\n",
      "Epoch: [19][2950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0258e+00 (9.2742e-01)\n",
      "Epoch: [19][3000/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1706e-01 (9.2714e-01)\n",
      "Epoch: [19][3050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0082e+00 (9.2714e-01)\n",
      "Epoch: [19][3100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8857e-01 (9.2691e-01)\n",
      "Epoch: [19][3150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7328e-01 (9.2665e-01)\n",
      "Epoch: [19][3200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9254e-01 (9.2660e-01)\n",
      "Epoch: [19][3250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9167e-01 (9.2669e-01)\n",
      "Epoch: [19][3300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3104e-01 (9.2694e-01)\n",
      "Epoch: [19][3350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7185e-01 (9.2687e-01)\n",
      "Epoch: [19][3400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8001e-01 (9.2651e-01)\n",
      "Epoch: [19][3450/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.7369e-01 (9.2629e-01)\n",
      "Epoch: [19][3500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.1178e-01 (9.2626e-01)\n",
      "Epoch: [19][3550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.2349e-01 (9.2632e-01)\n",
      "Epoch: [19][3600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.9834e-01 (9.2598e-01)\n",
      "Epoch: [19][3650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0747e+00 (9.2581e-01)\n",
      "Epoch: [19][3700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.2537e-01 (9.2575e-01)\n",
      "Epoch: [19][3750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0447e-01 (9.2591e-01)\n",
      "Epoch: [19][3800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7462e-01 (9.2576e-01)\n",
      "Epoch: [19][3850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5574e-01 (9.2553e-01)\n",
      "Epoch: [19][3900/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.4373e-01 (9.2558e-01)\n",
      "Epoch: [19][3950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9655e-01 (9.2553e-01)\n",
      "Epoch: [19][4000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3472e-01 (9.2552e-01)\n",
      "Epoch: [19][4050/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5333e-01 (9.2568e-01)\n",
      "Epoch: [19][4100/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.0439e-01 (9.2604e-01)\n",
      "Epoch: [19][4150/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6808e-01 (9.2630e-01)\n",
      "Epoch: [19][4200/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0568e+00 (9.2639e-01)\n",
      "Epoch: [19][4250/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0140e+00 (9.2611e-01)\n",
      "Epoch: [19][4300/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.8072e-01 (9.2618e-01)\n",
      "Epoch: [19][4350/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.9629e-01 (9.2623e-01)\n",
      "Epoch: [19][4400/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3362e-01 (9.2614e-01)\n",
      "Epoch: [19][4450/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.5545e-01 (9.2601e-01)\n",
      "Epoch: [19][4500/5005]\tTime  0.560 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.3460e-01 (9.2576e-01)\n",
      "Epoch: [19][4550/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.1417e+00 (9.2593e-01)\n",
      "Epoch: [19][4600/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.6826e-01 (9.2611e-01)\n",
      "Epoch: [19][4650/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6479e-01 (9.2633e-01)\n",
      "Epoch: [19][4700/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8549e-01 (9.2651e-01)\n",
      "Epoch: [19][4750/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.8403e-01 (9.2651e-01)\n",
      "Epoch: [19][4800/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 9.6615e-01 (9.2684e-01)\n",
      "Epoch: [19][4850/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.7778e-01 (9.2684e-01)\n",
      "Epoch: [19][4900/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 8.4733e-01 (9.2663e-01)\n",
      "Epoch: [19][4950/5005]\tTime  0.559 ( 0.560)\tData  0.000 ( 0.001)\tLoss 1.0555e+00 (9.2664e-01)\n",
      "Epoch: [19][5000/5005]\tTime  0.558 ( 0.560)\tData  0.000 ( 0.001)\tLoss 7.7647e-01 (9.2659e-01)\n",
      "Test: [  0/196]\tTime  3.354 ( 3.354)\tLoss 5.5974e-01 (5.5974e-01)\tAcc@1  84.38 ( 84.38)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.377 ( 0.435)\tLoss 5.1127e-01 (7.6786e-01)\tAcc@1  88.67 ( 79.70)\tAcc@5  96.48 ( 94.82)\n",
      "Test: [100/196]\tTime  0.377 ( 0.406)\tLoss 1.4213e+00 (9.0062e-01)\tAcc@1  61.72 ( 76.48)\tAcc@5  87.50 ( 93.48)\n",
      "Test: [150/196]\tTime  0.378 ( 0.397)\tLoss 1.1787e+00 (1.0175e+00)\tAcc@1  74.61 ( 74.20)\tAcc@5  88.67 ( 91.99)\n",
      "epoch 19 0.9265935722247776 73.3219985961914 0.0 2353956 0.10040116859931866\n",
      "acc 73.3219985961914\n"
     ]
    }
   ],
   "source": [
    "def get_res(epochs=20):\n",
    "    from torchvision.models import resnet50\n",
    "    model = resnet50(pretrained=True)\n",
    "    #model = resnet20()\n",
    "    #model.load_state_dict(torch.load(\"done_rn20_%s_%s_160_amp_fixed.pth\" % (seed, width)))\n",
    "    print(model, file=sys.stderr)\n",
    "    model.cuda()\n",
    "    \n",
    "    \n",
    "    #optimizer = torch.optim.AdamW(model.parameters(), 0.001)\n",
    "    optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    opt0 = torch.optim.SGD(model.parameters(), 0.0, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2], gamma=0.1)\n",
    "    scheduler = torch.optim.lr_scheduler.PolynomialLR(optimizer, total_iters=epochs, power=1)\n",
    "    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 0.004, epochs, cycle_momentum=False)\n",
    "    criterion = nn.CrossEntropyLoss()#SoftTargetCrossEntropy()\n",
    "    criterion_val = nn.CrossEntropyLoss()\n",
    "    scaler = torch.cuda.amp.GradScaler()\n",
    "    \n",
    "    total_params = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if (\"conv\" in n or \"downsample\" in n) and \"Conv\" in str(type(m)) and m.weight.shape[1] > 3:\n",
    "            print(n, m.weight.shape)\n",
    "            total_params += m.weight.numel()\n",
    "            prune.l1_unstructured(m, name='weight', amount=sparsity)\n",
    "    print(\"tot\", total_params)\n",
    "    \n",
    "    best_acc1 = 0\n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    print(\"start acc no bn\", acc1)\n",
    "    train_loss = train(train_loader, model, criterion, opt0, scaler, -1)\n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    total_active = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if \"conv\" in n or \"downsample\" in n and \"Conv\" in str(type(m)) and m.weight.shape[1] > 3:\n",
    "            total_active += (m.weight != 0).sum().item()\n",
    "    print(\"start acc bn\", acc1, total_active, total_active / total_params)\n",
    "\n",
    "    for epoch in range(epochs):\n",
    "        train_loss = train(train_loader, model, criterion, optimizer, scaler, epoch)\n",
    "        acc1 = validate(val_loader, model, criterion_val).item()\n",
    "        scheduler.step()\n",
    "        \n",
    "        # remember best acc@1 and save checkpoint\n",
    "        is_best = acc1 > best_acc1\n",
    "        best_acc1 = max(acc1, best_acc1)\n",
    "        total_active = 0\n",
    "        for n, m in model.named_modules():\n",
    "            if \"conv\" in n or \"downsample\" in n and \"Conv\" in str(type(m)) and m.weight.shape[1] > 3:\n",
    "                total_active += (m.weight != 0).sum().item()\n",
    "        print(\"epoch\", epoch, train_loss, acc1, optimizer.param_groups[0]['lr'], total_active, total_active / total_params)\n",
    "    \n",
    "    return acc1, copy.deepcopy(model.state_dict())\n",
    "\n",
    "acc, end = get_res()\n",
    "\n",
    "print(\"acc\", acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a063e74c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
