{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f553ec87",
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "seed = 10\n",
    "sparsity = 0.9\n",
    "width = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6edfc04b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fda18cc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import os\n",
    "import torchvision.transforms as transforms\n",
    "import torchvision.datasets as datasets\n",
    "import time\n",
    "import copy\n",
    "import sys\n",
    "\n",
    "import random\n",
    "import numpy as np\n",
    "import torch\n",
    "from sklearn.decomposition import PCA\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as ss\n",
    "from timm.data import Mixup\n",
    "from timm.loss import SoftTargetCrossEntropy\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.init as init\n",
    "import torch.nn.functional as F\n",
    "from torch.autograd import Variable\n",
    "\n",
    "import sys\n",
    "import numpy as np\n",
    "import torch.nn.utils.prune as prune\n",
    "from datautils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c9b3aabc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_seed(seed=42, rank=0):\n",
    "    torch.manual_seed(seed + rank)\n",
    "    np.random.seed(seed + rank)\n",
    "    random.seed(seed + rank)\n",
    "\n",
    "random_seed(47)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2348c12a",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader, val_loader = get_loaders(\n",
    "    \"imagenet\", path=\"\",\n",
    "    batchsize=256, workers=8,\n",
    "    nsamples=-1, seed=0,\n",
    "    noaug=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c50e599a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(train_loader, model, criterion, optimizer, scaler, epoch):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    data_time = AverageMeter('Data', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    #top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    #top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(train_loader),\n",
    "        [batch_time, data_time, losses],\n",
    "        prefix=\"Epoch: [{}]\".format(epoch))\n",
    "\n",
    "    # switch to train mode\n",
    "    model.train()\n",
    "\n",
    "    end = time.time()\n",
    "    for i, (images, target) in enumerate(train_loader):\n",
    "        # measure data loading time\n",
    "        data_time.update(time.time() - end)\n",
    "        images = images.cuda(non_blocking=True)\n",
    "        target = target.cuda(non_blocking=True)\n",
    "\n",
    "        # compute output\n",
    "        with torch.cuda.amp.autocast(enabled=True):\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "        # measure accuracy and record loss\n",
    "        #acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "        losses.update(loss.item(), images.size(0))\n",
    "        #top1.update(acc1[0], images.size(0))\n",
    "        #top5.update(acc5[0], images.size(0))\n",
    "\n",
    "        # compute gradient and do SGD step\n",
    "        optimizer.zero_grad()\n",
    "        scaler.scale(loss).backward()\n",
    "        scaler.step(optimizer)\n",
    "        scaler.update()\n",
    "        #loss.backward()\n",
    "        #optimizer.step()\n",
    "\n",
    "        # measure elapsed time\n",
    "        batch_time.update(time.time() - end)\n",
    "        end = time.time()\n",
    "\n",
    "        if i % 50 == 0:\n",
    "            progress.display(i)\n",
    "        if epoch == -1 and i == 50:\n",
    "            break\n",
    "\n",
    "    return losses.avg\n",
    "\n",
    "\n",
    "def validate(val_loader, model, criterion):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(val_loader),\n",
    "        [batch_time, losses, top1, top5],\n",
    "        prefix='Test: ')\n",
    "\n",
    "    # switch to evaluate mode\n",
    "    model.eval()\n",
    "\n",
    "    with torch.no_grad():\n",
    "        end = time.time()\n",
    "        for i, (images, target) in enumerate(val_loader):\n",
    "            images = images.cuda(non_blocking=True)\n",
    "            target = target.cuda(non_blocking=True)\n",
    "\n",
    "            # compute output\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "            # measure accuracy and record loss\n",
    "            acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "            losses.update(loss.item(), images.size(0))\n",
    "            top1.update(acc1[0], images.size(0))\n",
    "            top5.update(acc5[0], images.size(0))\n",
    "\n",
    "            # measure elapsed time\n",
    "            batch_time.update(time.time() - end)\n",
    "            end = time.time()\n",
    "\n",
    "            if i % 50 == 0:\n",
    "                progress.display(i)\n",
    "\n",
    "        # TODO: this should also be done with the ProgressMeter\n",
    "        #print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'\n",
    "        #      .format(top1=top1, top5=top5))\n",
    "\n",
    "    return top1.avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "698abd39",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):\n",
    "    torch.save(state, filename)\n",
    "    if is_best:\n",
    "        shutil.copyfile(filename, 'model_best.pth.tar')\n",
    "\n",
    "\n",
    "class AverageMeter(object):\n",
    "    \"\"\"Computes and stores the average and current value\"\"\"\n",
    "    def __init__(self, name, fmt=':f'):\n",
    "        self.name = name\n",
    "        self.fmt = fmt\n",
    "        self.reset()\n",
    "\n",
    "    def reset(self):\n",
    "        self.val = 0\n",
    "        self.avg = 0\n",
    "        self.sum = 0\n",
    "        self.count = 0\n",
    "\n",
    "    def update(self, val, n=1):\n",
    "        self.val = val\n",
    "        self.sum += val * n\n",
    "        self.count += n\n",
    "        self.avg = self.sum / self.count\n",
    "\n",
    "    def __str__(self):\n",
    "        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'\n",
    "        return fmtstr.format(**self.__dict__)\n",
    "\n",
    "\n",
    "class ProgressMeter(object):\n",
    "    def __init__(self, num_batches, meters, prefix=\"\"):\n",
    "        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)\n",
    "        self.meters = meters\n",
    "        self.prefix = prefix\n",
    "\n",
    "    def display(self, batch):\n",
    "        entries = [self.prefix + self.batch_fmtstr.format(batch)]\n",
    "        entries += [str(meter) for meter in self.meters]\n",
    "        print('\\t'.join(entries))\n",
    "\n",
    "    def _get_batch_fmtstr(self, num_batches):\n",
    "        num_digits = len(str(num_batches // 1))\n",
    "        fmt = '{:' + str(num_digits) + 'd}'\n",
    "        return '[' + fmt + '/' + fmt.format(num_batches) + ']'\n",
    "\n",
    "\n",
    "def adjust_learning_rate(optimizer, epoch):\n",
    "    \"\"\"Sets the learning rate to the initial LR decayed by 10 every 30 epochs\"\"\"\n",
    "    lr = LR * (0.1 ** (epoch // 30))\n",
    "    for param_group in optimizer.param_groups:\n",
    "        param_group['lr'] = lr\n",
    "\n",
    "\n",
    "def accuracy(output, target, topk=(1,)):\n",
    "    \"\"\"Computes the accuracy over the k top predictions for the specified values of k\"\"\"\n",
    "    with torch.no_grad():\n",
    "        maxk = max(topk)\n",
    "        batch_size = target.size(0)\n",
    "\n",
    "        _, pred = output.topk(maxk, 1, True, True)\n",
    "        pred = pred.t()\n",
    "        correct = pred.eq(target.view(1, -1).expand_as(pred))\n",
    "\n",
    "        res = []\n",
    "        for k in topk:\n",
    "            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)\n",
    "            res.append(correct_k.mul_(100.0 / batch_size))\n",
    "        return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "76d49f44",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_other2(A, W, nnz, Z, U, print_sc=None, debug=False, reg=0, rho_start=0.03, iters=5, prune_iters=2):\n",
    "    XX = A.T.matmul(A)\n",
    "    norm2 = torch.diag(XX).sqrt() + 1e-8\n",
    "    An = A / norm2\n",
    "    XX = An.T.matmul(An)\n",
    "    XX += torch.diag(torch.ones_like(XX.diag())) * XX.diag().mean() * reg\n",
    "    \n",
    "    #norm2 = torch.ones_like(norm2)\n",
    "    Wnn = W# * norm2.unsqueeze(1)\n",
    "    rho = 1\n",
    "    XY = An.T.matmul(Wnn)\n",
    "    XXinv = torch.inverse(XX + torch.eye(XX.shape[1], device=XX.device)*rho)\n",
    "    XXinv2 = torch.inverse(XX + torch.eye(XX.shape[1], device=XX.device)*rho_start)\n",
    "    U = U * norm2.unsqueeze(1)\n",
    "    Z = Z * norm2.unsqueeze(1)\n",
    "    \n",
    "    #B = torch.linalg.solve(XX, XY)\n",
    "    B = XXinv2.matmul(XY + rho_start*(Z-U))\n",
    "    \n",
    "    #U = torch.zeros_like(B)\n",
    "    \n",
    "    #Z = B\n",
    "    \n",
    "    bsparsity = min(0.99, 1 - nnz/B.numel())\n",
    "    #print(\"bs\", bsparsity)\n",
    "\n",
    "\n",
    "    for itt in range(iters):\n",
    "        if itt < prune_iters:\n",
    "            cur_sparsity = bsparsity# - bsparsity * (1 - (itt + 1) / iterative_prune) ** 3\n",
    "            thres = (B+U).abs().flatten().sort()[0][int(B.numel() * cur_sparsity)]\n",
    "            mask = ((B+U).abs() > thres)\n",
    "            del thres\n",
    "\n",
    "        Z = (B + U) * mask    \n",
    "\n",
    "        U = U + (B - Z)    \n",
    "\n",
    "        B = XXinv.matmul(XY + rho*(Z-U))\n",
    "        #B = torch.linalg.solve(XX + torch.eye(XX.shape[1], device=XX.device)*rho, XY + rho*(Z-U))\n",
    "        if debug:\n",
    "            print(itt, cur_sparsity, (Z != 0).sum().item() / Z.numel())\n",
    "            print_sc(A.matmul(B / norm2.unsqueeze(1)))\n",
    "            print_sc(A.matmul(Z / norm2.unsqueeze(1)))\n",
    "            print(((An != 0).sum() + (Z != 0).sum()) / W.numel())\n",
    "            print(\"-------\")\n",
    "    if debug:\n",
    "        print(\"opt end\")\n",
    "\n",
    "    return Z / norm2.unsqueeze(1), U / norm2.unsqueeze(1)    \n",
    "    \n",
    "def mag_prune(W, sp=0.6):\n",
    "    thres = (W).abs().flatten().sort()[0][int(W.numel() * sp)]\n",
    "    mask = ((W).abs() > thres)\n",
    "    return W * mask\n",
    "\n",
    "def ent(p):\n",
    "    return -(p * np.log2(p) + (1-p) * np.log2(1-p))\n",
    "\n",
    "def factorizeT(W, XX, asp=0.16, sp=0.4, iters=40):\n",
    "    #W = lx.weight.detach().T.float()\n",
    "    nza = int(W.shape[0]**2 * asp)\n",
    "    nzb = int(W.numel() * sp - nza)\n",
    "    \n",
    "    Az = torch.eye(W.shape[0], device=W.device)\n",
    "    Au = torch.zeros_like(Az)\n",
    "    norm = XX.diag().sqrt().unsqueeze(1) + 1e-8\n",
    "    norm = torch.ones_like(norm)\n",
    "       \n",
    "    Wn = W * norm\n",
    "       \n",
    "    print(\"nz\", nza, nzb, Wn.shape)\n",
    "    Bz = mag_prune(Wn, (1 - nzb/W.numel()))\n",
    "    Bu = torch.zeros_like(Bz)\n",
    "    \n",
    "    for itt in range(iters):\n",
    "        #if itt < 10:\n",
    "        #    rho_start = 0.0\n",
    "        #elif itt < 15:\n",
    "        #    rho_start = 0.00\n",
    "        #else:\n",
    "        #    rho_start = 0.1\n",
    "        rho_start = min(1.0, itt / (iters-3))**3\n",
    "        Az, Au = (x.T for x in find_other2(Bz.T, Wn.T, nza, Az.T, Au.T, reg=1e-2, debug=False, rho_start=rho_start))\n",
    "                \n",
    "        Bz, Bu = find_other2(Az, Wn, nzb, Bz, Bu, reg=1e-2, debug=False, rho_start=rho_start)\n",
    "    \n",
    "    #print(((Az != 0).sum() + (Bz != 0).sum()).item() / W.numel(), (Az != 0).sum().item() / Az.numel(),\n",
    "    #      (Bz != 0).sum().item() / Bz.numel(), Az.shape, Bz.shape,\n",
    "    #     (Az.numel()*ent((Az != 0).sum().item() / Az.numel()) + Bz.numel()*ent((Bz != 0).sum().item() / Bz.numel())) / W.numel(), \n",
    "    #    ent(0.4), ent(0.5))\n",
    "    return ((Az / norm).matmul(Bz)).T, Bz.T, (Az / norm).T\n",
    "\n",
    "\n",
    "def factorizef(W, XX, asp=0.16, sp=0.4, iters=200, l_prev=None):\n",
    "    s_time = time.time()\n",
    "    if W.shape[0] >= W.shape[1]:\n",
    "        return factorizeT(W.T, XX, sp=sp, asp=asp, iters=iters)\n",
    "    \n",
    "    nza = int(W.shape[0]**2 * asp)\n",
    "    nzb = int(W.numel() * sp - nza)\n",
    "    norm = XX.diag().sqrt() + 1e-8\n",
    "    norm = torch.ones_like(norm)\n",
    "\n",
    "    Wn = W * norm\n",
    "    \n",
    "    Az = torch.eye(W.shape[0], device=W.device)\n",
    "    Au = torch.zeros_like(Az)\n",
    "\n",
    "    print(\"nz\", nza, nzb, Wn.shape)\n",
    "    Bz = mag_prune(Wn, (1 - nzb/W.numel()))\n",
    "    Bu = torch.zeros_like(Bz)\n",
    "    \n",
    "    for itt in range(iters):\n",
    "        #if itt < 10:\n",
    "        #    rho_start = 0.0\n",
    "        #elif itt < 15:\n",
    "        #    rho_start = 0.00\n",
    "        #else:\n",
    "        #    rho_start = 0.1\n",
    "            \n",
    "        rho_start = min(1.0, itt / (iters-3))**3\n",
    "        Az, Au = (x.T for x in find_other2(Bz.T, Wn.T, nza, Az.T, Au.T, reg=1e-2, debug=False, rho_start=rho_start))\n",
    "                \n",
    "        Bz, Bu = find_other2(Az, Wn, nzb, Bz, Bu, reg=1e-2, debug=False, rho_start=rho_start)\n",
    "        \n",
    "        #print(itt, time.time() - s_time, end =\" \") \n",
    "        #print_scores(Az.matmul(Bz / norm))\n",
    "        \n",
    "        \n",
    "    #print(((Az != 0).sum() + (Bz != 0).sum()).item() / W.numel(), (Az != 0).sum().item() / Az.numel(),\n",
    "    #      (Bz != 0).sum().item() / Bz.numel(), Az.shape, Bz.shape,\n",
    "    #     (Az.numel()*ent((Az != 0).sum().item() / Az.numel()) + Bz.numel()*ent((Bz != 0).sum().item() / Bz.numel())) / W.numel(), \n",
    "    #    ent(0.4), ent(0.5))\n",
    "    return Az.matmul(Bz / norm), Az, Bz / norm\n",
    "\n",
    "def factorize(XX, W, sp, l_prev=None):\n",
    "    W = W.detach().float()\n",
    "    asp = max(0.05, sp/2)\n",
    "    W2, Ab, Bb = factorizef(W, XX, sp=sp, asp=asp, l_prev=l_prev)\n",
    "    An = Ab.norm(dim=0) + 1e-12\n",
    "    Bn = Bb.norm(dim=1) + 1e-12\n",
    "    #print(An, Bn)\n",
    "    Ab *= (Bn/An).sqrt()\n",
    "    Bb *= (An/Bn).sqrt().unsqueeze(1)\n",
    "    #print(Ab.norm(dim=0), Bb.norm(dim=1))\n",
    "    W2 = Ab.matmul(Bb)\n",
    "    print(\"err_prefin\", (W2 - W).matmul(XX).matmul((W2 - W).T).diag().sum().item(), W.abs().amax().item(), Ab.abs().amax().item(), Bb.abs().amax().item())\n",
    "    #qq = qqqq\n",
    "    print(\"sparsity check\", ((Ab != 0).sum() + (Bb != 0).sum()).item() / W2.numel())\n",
    "    return W2, (Ab, Bb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d1aab8b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def hook(m, *args, **kwargs):\n",
    "    m.weight = m.wo * m.mask\n",
    "\n",
    "def add_mask(m):\n",
    "    m.register_parameter(\"wo\", m.weight)\n",
    "    m.register_buffer(\"mask\", torch.nn.Parameter((m.weight.data != 0).to(torch.float32)))\n",
    "    del m._parameters[\"weight\"]\n",
    "    m.register_forward_pre_hook(hook)\n",
    "\n",
    "\n",
    "def run_dsp(model):\n",
    "    out_admm = {}\n",
    "    for n, m in model.named_modules():\n",
    "        if type(m) == nn.Conv2d and m.weight.shape[1] > 3:\n",
    "            density = 1 - sparsity\n",
    "            w_orig = m.weight.flatten(1)\n",
    "            w_mag = mag_prune(w_orig, sparsity)\n",
    "            w_admm, facts = factorize(torch.eye(w_orig.shape[1], device=w_orig.device), w_orig, density)\n",
    "            out_admm[n] = (w_admm.reshape(w_orig.shape), facts)\n",
    "            print(n, (w_admm - w_orig).square().sum().item(), (w_mag - w_orig).square().sum().item(), w_orig.square().sum().item())\n",
    "            #m.XX = None\n",
    "\n",
    "    for n, m in model.named_modules():\n",
    "        if n in out_admm:\n",
    "            print(\"change\", n)\n",
    "            m.weight.data = out_admm[n][0].reshape(m.weight.shape)\n",
    "            m.weight.facts = out_admm[n][1]\n",
    "    \n",
    "    for n, m in model.named_modules():\n",
    "        if \"Bottleneck\" in str(type(m)):\n",
    "            print(m.conv1.weight.shape, m.conv1.weight.facts[0].shape, m.conv1.weight.facts[1].shape)\n",
    "            if True:\n",
    "                ff = m.conv1.weight.facts\n",
    "                m.conv1b = m.conv1\n",
    "                m.conv1 = nn.Sequential(\n",
    "                    nn.Conv2d(m.conv1b.in_channels, m.conv1b.out_channels, 1, bias=False),\n",
    "                    nn.Conv2d(m.conv1b.out_channels, m.conv1b.out_channels, 1, bias=False)\n",
    "                )\n",
    "                m.conv1[0].weight.data = ff[1].reshape(m.conv1[0].weight.shape)\n",
    "                m.conv1[1].weight.data = ff[0].reshape(m.conv1[1].weight.shape)\n",
    "                m.conv1.cuda()\n",
    "                add_mask(m.conv1[0])\n",
    "                add_mask(m.conv1[1])\n",
    "                \n",
    "            print(m.conv2.weight.shape, m.conv2.weight.facts[0].shape, m.conv2.weight.facts[1].shape)\n",
    "            \n",
    "            if True:\n",
    "                ff = m.conv2.weight.facts\n",
    "                m.conv2b = m.conv2\n",
    "                m.conv2 = nn.Sequential(\n",
    "                    nn.Conv2d(m.conv2b.in_channels, m.conv2b.out_channels, 3, padding=1, stride=m.conv2b.stride, bias=False),\n",
    "                    nn.Conv2d(m.conv2b.out_channels, m.conv2b.out_channels, 1, bias=False)\n",
    "                )\n",
    "                #m.conv2[0].register_forward_hook(boo)\n",
    "                m.conv2[0].weight.data = ff[1].reshape(m.conv2[0].weight.shape)\n",
    "                m.conv2[1].weight.data = ff[0].reshape(m.conv2[1].weight.shape)\n",
    "                m.conv2.cuda()\n",
    "                add_mask(m.conv2[0])\n",
    "                add_mask(m.conv2[1])\n",
    "                \n",
    "            if True:\n",
    "                ff = m.conv3.weight.facts\n",
    "                m.conv3b = m.conv3\n",
    "                m.conv3 = nn.Sequential(\n",
    "                    nn.Conv2d(m.conv3b.in_channels, m.conv3b.in_channels, 1, bias=False),\n",
    "                    nn.Conv2d(m.conv3b.in_channels, m.conv3b.out_channels, 1, bias=False)\n",
    "                )\n",
    "                m.conv3[0].weight.data = ff[1].reshape(m.conv3[0].weight.shape)\n",
    "                m.conv3[1].weight.data = ff[0].reshape(m.conv3[1].weight.shape)\n",
    "                m.conv3.cuda()\n",
    "                add_mask(m.conv3[0])\n",
    "                add_mask(m.conv3[1])\n",
    "            \n",
    "            if m.downsample is not None:\n",
    "                print(m.downsample[0].weight.shape, m.downsample[0].weight.facts[0].shape, m.downsample[0].weight.facts[1].shape)\n",
    "                m.sb = m.downsample[0]\n",
    "                ff = m.sb.weight.facts\n",
    "                m.downsample[0] = nn.Sequential(\n",
    "                    nn.Conv2d(m.sb.in_channels, m.sb.in_channels, 1, stride=m.sb.stride, bias=False),\n",
    "                    nn.Conv2d(m.sb.in_channels, m.sb.out_channels, 1, bias=False)\n",
    "                )\n",
    "                #m.conv2[0].register_forward_hook(boo)\n",
    "                m.downsample[0][0].weight.data = ff[1].reshape(m.downsample[0][0].weight.shape)\n",
    "                m.downsample[0][1].weight.data = ff[0].reshape(m.downsample[0][1].weight.shape)\n",
    "                m.downsample.cuda()\n",
    "                add_mask(m.downsample[0][0])\n",
    "                add_mask(m.downsample[0][1])\n",
    "                \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "dba446b7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tot 23445504\n",
      "nz 204 205 torch.Size([64, 64])\n",
      "err_prefin 3.1943302154541016 0.7266281247138977 0.8554760217666626 0.8422347903251648\n",
      "sparsity check 0.099365234375\n",
      "layer1.0.conv1 3.1943302154541016 4.395542144775391 20.62295150756836\n",
      "nz 204 3482 torch.Size([64, 576])\n",
      "err_prefin 5.441827774047852 0.46786433458328247 1.0092570781707764 0.45649296045303345\n",
      "sparsity check 0.09993489583333333\n",
      "layer1.0.conv2 5.441827774047852 7.894467353820801 31.344758987426758\n",
      "nz 204 1434 torch.Size([64, 256])\n",
      "err_prefin 3.452609062194824 0.3936349153518677 0.5334653258323669 0.8795067667961121\n",
      "sparsity check 0.099853515625\n",
      "layer1.0.conv3 3.452609062194824 5.793004035949707 20.379261016845703\n",
      "nz 204 1434 torch.Size([64, 256])\n",
      "err_prefin 7.820006847381592 0.987881064414978 0.7290602922439575 1.3375771045684814\n",
      "sparsity check 0.099853515625\n",
      "layer1.0.downsample.0 7.82000732421875 11.938996315002441 54.36511993408203\n",
      "nz 204 1434 torch.Size([64, 256])\n",
      "err_prefin 4.0182929039001465 0.2617597281932831 0.8231250047683716 0.38419216871261597\n",
      "sparsity check 0.099853515625\n",
      "layer1.1.conv1 4.0182929039001465 5.671724319458008 15.39460277557373\n",
      "nz 204 3482 torch.Size([64, 576])\n",
      "err_prefin 8.85940170288086 0.5201045870780945 0.9710637331008911 0.530194878578186\n",
      "sparsity check 0.09993489583333333\n",
      "layer1.1.conv2 8.85940170288086 11.242288589477539 30.597768783569336\n",
      "nz 204 1434 torch.Size([64, 256])\n",
      "err_prefin 3.9398086071014404 0.29462704062461853 0.409368097782135 0.9055802822113037\n",
      "sparsity check 0.099853515625\n",
      "layer1.1.conv3 3.9398086071014404 5.408066749572754 17.34823989868164\n",
      "nz 204 1434 torch.Size([64, 256])\n",
      "err_prefin 4.553450584411621 0.19206704199314117 0.6257369518280029 0.36849457025527954\n",
      "sparsity check 0.099853515625\n",
      "layer1.2.conv1 4.553450107574463 6.383799076080322 14.82396125793457\n",
      "nz 204 3482 torch.Size([64, 576])\n",
      "err_prefin 13.655452728271484 0.2855665683746338 0.7525722980499268 0.38068628311157227\n",
      "sparsity check 0.09993489583333333\n",
      "layer1.2.conv2 13.655452728271484 17.147613525390625 37.246177673339844\n",
      "nz 204 1434 torch.Size([64, 256])\n",
      "err_prefin 3.3338074684143066 0.2751551568508148 0.46889227628707886 0.8783687949180603\n",
      "sparsity check 0.099853515625\n",
      "layer1.2.conv3 3.3338074684143066 4.3794403076171875 15.938384056091309\n",
      "nz 819 2457 torch.Size([128, 256])\n",
      "err_prefin 9.605802536010742 0.3531537353992462 0.6849489212036133 0.5659644603729248\n",
      "sparsity check 0.09991455078125\n",
      "layer2.0.conv1 9.605802536010742 15.671998977661133 41.18355941772461\n",
      "nz 819 13926 torch.Size([128, 1152])\n",
      "err_prefin 24.87860870361328 0.29927510023117065 0.8672854900360107 0.34650444984436035\n",
      "sparsity check 0.09998236762152778\n",
      "layer2.0.conv2 24.87860679626465 33.07981872558594 73.3001937866211\n",
      "nz 819 5734 torch.Size([128, 512])\n",
      "err_prefin 10.585004806518555 0.3916732370853424 0.5166381001472473 0.8153484463691711\n",
      "sparsity check 0.0999603271484375\n",
      "layer2.0.conv3 10.585004806518555 15.467805862426758 51.68487548828125\n",
      "nz 3276 9831 torch.Size([256, 512])\n",
      "err_prefin 8.336135864257812 0.5662445425987244 0.49069318175315857 0.7630191445350647\n",
      "sparsity check 0.09998321533203125\n",
      "layer2.0.downsample.0 8.336135864257812 17.10980224609375 70.04782104492188\n",
      "nz 819 5734 torch.Size([128, 512])\n",
      "err_prefin 2.2226064205169678 0.25216183066368103 0.5871654748916626 0.4147474467754364\n",
      "sparsity check 0.0999603271484375\n",
      "layer2.1.conv1 2.2226064205169678 4.824129104614258 18.187597274780273\n",
      "nz 819 13926 torch.Size([128, 1152])\n",
      "err_prefin 7.620846748352051 0.2997848093509674 0.7950398325920105 0.39130106568336487\n",
      "sparsity check 0.09998236762152778\n",
      "layer2.1.conv2 7.620846748352051 15.775908470153809 55.13344192504883\n",
      "nz 819 5734 torch.Size([128, 512])\n",
      "err_prefin 3.9108235836029053 0.30379050970077515 0.44030895829200745 0.8828412294387817\n",
      "sparsity check 0.0999603271484375\n",
      "layer2.1.conv3 3.9108235836029053 6.641151428222656 31.986560821533203\n",
      "nz 819 5734 torch.Size([128, 512])\n",
      "err_prefin 8.790669441223145 0.23830968141555786 0.7148557305335999 0.3894064426422119\n",
      "sparsity check 0.0999603271484375\n",
      "layer2.2.conv1 8.790669441223145 13.02694034576416 35.66695022583008\n",
      "nz 819 13926 torch.Size([128, 1152])\n",
      "err_prefin 19.564407348632812 0.2555960714817047 0.7359195947647095 0.37041887640953064\n",
      "sparsity check 0.09998236762152778\n",
      "layer2.2.conv2 19.564407348632812 26.603708267211914 68.77576446533203\n",
      "nz 819 5734 torch.Size([128, 512])\n",
      "err_prefin 12.445109367370605 0.35215842723846436 0.4855828583240509 0.7750959992408752\n",
      "sparsity check 0.0999603271484375\n",
      "layer2.2.conv3 12.445108413696289 17.070392608642578 44.91209411621094\n",
      "nz 819 5734 torch.Size([128, 512])\n",
      "err_prefin 12.216817855834961 0.28143224120140076 0.6624945998191833 0.41919368505477905\n",
      "sparsity check 0.0999603271484375\n",
      "layer2.3.conv1 12.216815948486328 17.234355926513672 38.818328857421875\n",
      "nz 819 13926 torch.Size([128, 1152])\n",
      "err_prefin 27.037776947021484 0.2209654152393341 0.7471863627433777 0.30757081508636475\n",
      "sparsity check 0.09998236762152778\n",
      "layer2.3.conv2 27.037776947021484 34.705230712890625 74.12933349609375\n",
      "nz 819 5734 torch.Size([128, 512])\n",
      "err_prefin 10.533113479614258 0.2956201732158661 0.43666544556617737 0.7392120361328125\n",
      "sparsity check 0.0999603271484375\n",
      "layer2.3.conv3 10.533113479614258 14.172555923461914 38.846248626708984\n",
      "nz 3276 9831 torch.Size([256, 512])\n",
      "err_prefin 30.329708099365234 0.3425379693508148 0.7655584216117859 0.5242722034454346\n",
      "sparsity check 0.09998321533203125\n",
      "layer3.0.conv1 30.3297061920166 49.61932373046875 124.27618408203125\n",
      "nz 3276 55706 torch.Size([256, 2304])\n",
      "err_prefin 52.68927001953125 0.2007666528224945 0.8556164503097534 0.2871233820915222\n",
      "sparsity check 0.09999593098958333\n",
      "layer3.0.conv2 52.68927001953125 77.27236938476562 177.90823364257812\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 40.010154724121094 0.32124170660972595 0.5010008811950684 0.7703579664230347\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.0.conv3 40.01015853881836 58.43788146972656 143.28042602539062\n",
      "nz 13107 39321 torch.Size([512, 1024])\n",
      "err_prefin 27.8060245513916 0.34601572155952454 0.4393579959869385 0.5857852101325989\n",
      "sparsity check 0.09999465942382812\n",
      "layer3.0.downsample.0 27.806026458740234 49.90775680541992 136.73268127441406\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 14.941981315612793 0.29417240619659424 0.6579937934875488 0.45383161306381226\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.1.conv1 14.94198226928711 23.940704345703125 60.75559616088867\n",
      "nz 3276 55706 torch.Size([256, 2304])\n",
      "err_prefin 38.162925720214844 0.2634257674217224 0.8233993053436279 0.3736192286014557\n",
      "sparsity check 0.09999593098958333\n",
      "layer3.1.conv2 38.162925720214844 56.6761474609375 134.50469970703125\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 31.919002532958984 0.4968879222869873 0.5041258335113525 0.8976971507072449\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.1.conv3 31.919002532958984 44.23591995239258 108.93757629394531\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 17.081296920776367 0.2714691460132599 0.7070178389549255 0.47314751148223877\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.2.conv1 17.081298828125 26.04974937438965 65.09668731689453\n",
      "nz 3276 55706 torch.Size([256, 2304])\n",
      "err_prefin 43.14348220825195 0.21002456545829773 0.8537011742591858 0.2743801176548004\n",
      "sparsity check 0.09999593098958333\n",
      "layer3.2.conv2 43.14348220825195 61.19694519042969 133.0740509033203\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 28.40374755859375 0.3537616431713104 0.4754135310649872 0.7067458033561707\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.2.conv3 28.40374755859375 40.70864486694336 95.56613159179688\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 25.144611358642578 0.2390037477016449 0.7076114416122437 0.43214547634124756\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.3.conv1 25.144611358642578 34.94697952270508 79.00544738769531\n",
      "nz 3276 55706 torch.Size([256, 2304])\n",
      "err_prefin 48.707763671875 0.27921295166015625 0.8285293579101562 0.38120391964912415\n",
      "sparsity check 0.09999593098958333\n",
      "layer3.3.conv2 48.707767486572266 64.19059753417969 131.74488830566406\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 27.961111068725586 0.3126426041126251 0.44533678889274597 0.7298555374145508\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.3.conv3 27.961111068725586 38.3577766418457 87.29132843017578\n",
      "nz 3276 22938 torch.Size([256, 1024])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "err_prefin 29.67755126953125 0.2721982002258301 0.6721134781837463 0.433461457490921\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.4.conv1 29.677553176879883 39.984375 86.25032043457031\n",
      "nz 3276 55706 torch.Size([256, 2304])\n",
      "err_prefin 48.958839416503906 0.19188867509365082 0.719207227230072 0.2540438175201416\n",
      "sparsity check 0.09999593098958333\n",
      "layer3.4.conv2 48.958839416503906 64.2547607421875 130.9825439453125\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 27.120649337768555 0.316133052110672 0.37045490741729736 0.7376529574394226\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.4.conv3 27.120647430419922 37.8643798828125 87.05435180664062\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 37.631561279296875 0.39949774742126465 0.7623628973960876 0.5068808197975159\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.5.conv1 37.631561279296875 48.90454864501953 102.01531982421875\n",
      "nz 3276 55706 torch.Size([256, 2304])\n",
      "err_prefin 50.715980529785156 0.2235630750656128 0.7789952754974365 0.2990630865097046\n",
      "sparsity check 0.09999593098958333\n",
      "layer3.5.conv2 50.715980529785156 67.7354736328125 137.49560546875\n",
      "nz 3276 22938 torch.Size([256, 1024])\n",
      "err_prefin 31.903594970703125 0.32883593440055847 0.517221987247467 0.9113131761550903\n",
      "sparsity check 0.0999908447265625\n",
      "layer3.5.conv3 31.90359878540039 44.63056945800781 98.38701629638672\n",
      "nz 13107 39321 torch.Size([512, 1024])\n",
      "err_prefin 99.4652099609375 0.3415152430534363 0.8244891166687012 0.4512406289577484\n",
      "sparsity check 0.09999465942382812\n",
      "layer4.0.conv1 99.4652099609375 149.02874755859375 297.8857116699219\n",
      "nz 13107 222822 torch.Size([512, 4608])\n",
      "err_prefin 131.0057373046875 0.3992723524570465 0.9301921725273132 0.4265134334564209\n",
      "sparsity check 0.09999889797634548\n",
      "layer4.0.conv2 131.00572204589844 194.92591857910156 372.49163818359375\n",
      "nz 13107 91750 torch.Size([512, 2048])\n",
      "err_prefin 90.66539001464844 0.3546474874019623 0.4020810127258301 0.8380321264266968\n",
      "sparsity check 0.09999752044677734\n",
      "layer4.0.conv3 90.66539001464844 120.89808654785156 245.60366821289062\n",
      "nz 52428 157287 torch.Size([1024, 2048])\n",
      "err_prefin 61.01219177246094 0.6412832736968994 0.7329512238502502 0.7966853976249695\n",
      "sparsity check 0.09999895095825195\n",
      "layer4.0.downsample.0 61.01219177246094 98.3612060546875 207.0251007080078\n",
      "nz 13107 91750 torch.Size([512, 2048])\n",
      "err_prefin 84.73329162597656 0.700333833694458 0.7615525722503662 0.7846521735191345\n",
      "sparsity check 0.09999752044677734\n",
      "layer4.1.conv1 84.73329162597656 116.5870361328125 228.15423583984375\n",
      "nz 13107 222822 torch.Size([512, 4608])\n",
      "err_prefin 139.35755920410156 0.22574764490127563 0.8669556379318237 0.23027899861335754\n",
      "sparsity check 0.09999889797634548\n",
      "layer4.1.conv2 139.35757446289062 193.06495666503906 358.32769775390625\n",
      "nz 13107 91750 torch.Size([512, 2048])\n",
      "err_prefin 85.66370391845703 0.24268335103988647 0.4501360356807709 0.6561303734779358\n",
      "sparsity check 0.09999752044677734\n",
      "layer4.1.conv3 85.66371154785156 116.59471130371094 234.42181396484375\n",
      "nz 13107 91750 torch.Size([512, 2048])\n",
      "err_prefin 134.7242431640625 0.4540838599205017 0.8663203716278076 0.4802909195423126\n",
      "sparsity check 0.09999752044677734\n",
      "layer4.2.conv1 134.7242431640625 179.3154296875 345.0306396484375\n",
      "nz 13107 222822 torch.Size([512, 4608])\n",
      "err_prefin 92.75001525878906 0.14155906438827515 0.668501615524292 0.176071897149086\n",
      "sparsity check 0.09999889797634548\n",
      "layer4.2.conv2 92.75001525878906 156.95947265625 288.8470764160156\n",
      "nz 13107 91750 torch.Size([512, 2048])\n",
      "err_prefin 69.09007263183594 0.279774934053421 0.3839866816997528 0.6194970011711121\n",
      "sparsity check 0.09999752044677734\n",
      "layer4.2.conv3 69.09007263183594 96.81941986083984 210.6678466796875\n",
      "change layer1.0.conv1\n",
      "change layer1.0.conv2\n",
      "change layer1.0.conv3\n",
      "change layer1.0.downsample.0\n",
      "change layer1.1.conv1\n",
      "change layer1.1.conv2\n",
      "change layer1.1.conv3\n",
      "change layer1.2.conv1\n",
      "change layer1.2.conv2\n",
      "change layer1.2.conv3\n",
      "change layer2.0.conv1\n",
      "change layer2.0.conv2\n",
      "change layer2.0.conv3\n",
      "change layer2.0.downsample.0\n",
      "change layer2.1.conv1\n",
      "change layer2.1.conv2\n",
      "change layer2.1.conv3\n",
      "change layer2.2.conv1\n",
      "change layer2.2.conv2\n",
      "change layer2.2.conv3\n",
      "change layer2.3.conv1\n",
      "change layer2.3.conv2\n",
      "change layer2.3.conv3\n",
      "change layer3.0.conv1\n",
      "change layer3.0.conv2\n",
      "change layer3.0.conv3\n",
      "change layer3.0.downsample.0\n",
      "change layer3.1.conv1\n",
      "change layer3.1.conv2\n",
      "change layer3.1.conv3\n",
      "change layer3.2.conv1\n",
      "change layer3.2.conv2\n",
      "change layer3.2.conv3\n",
      "change layer3.3.conv1\n",
      "change layer3.3.conv2\n",
      "change layer3.3.conv3\n",
      "change layer3.4.conv1\n",
      "change layer3.4.conv2\n",
      "change layer3.4.conv3\n",
      "change layer3.5.conv1\n",
      "change layer3.5.conv2\n",
      "change layer3.5.conv3\n",
      "change layer4.0.conv1\n",
      "change layer4.0.conv2\n",
      "change layer4.0.conv3\n",
      "change layer4.0.downsample.0\n",
      "change layer4.1.conv1\n",
      "change layer4.1.conv2\n",
      "change layer4.1.conv3\n",
      "change layer4.2.conv1\n",
      "change layer4.2.conv2\n",
      "change layer4.2.conv3\n",
      "torch.Size([64, 64, 1, 1]) torch.Size([64, 64]) torch.Size([64, 64])\n",
      "torch.Size([64, 64, 3, 3]) torch.Size([64, 64]) torch.Size([64, 576])\n",
      "torch.Size([256, 64, 1, 1]) torch.Size([256, 64]) torch.Size([64, 64])\n",
      "torch.Size([64, 256, 1, 1]) torch.Size([64, 64]) torch.Size([64, 256])\n",
      "torch.Size([64, 64, 3, 3]) torch.Size([64, 64]) torch.Size([64, 576])\n",
      "torch.Size([64, 256, 1, 1]) torch.Size([64, 64]) torch.Size([64, 256])\n",
      "torch.Size([64, 64, 3, 3]) torch.Size([64, 64]) torch.Size([64, 576])\n",
      "torch.Size([128, 256, 1, 1]) torch.Size([128, 128]) torch.Size([128, 256])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([512, 256, 1, 1]) torch.Size([512, 256]) torch.Size([256, 256])\n",
      "torch.Size([128, 512, 1, 1]) torch.Size([128, 128]) torch.Size([128, 512])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([128, 512, 1, 1]) torch.Size([128, 128]) torch.Size([128, 512])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([128, 512, 1, 1]) torch.Size([128, 128]) torch.Size([128, 512])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([256, 512, 1, 1]) torch.Size([256, 256]) torch.Size([256, 512])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([1024, 512, 1, 1]) torch.Size([1024, 512]) torch.Size([512, 512])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([512, 1024, 1, 1]) torch.Size([512, 512]) torch.Size([512, 1024])\n",
      "torch.Size([512, 512, 3, 3]) torch.Size([512, 512]) torch.Size([512, 4608])\n",
      "torch.Size([2048, 1024, 1, 1]) torch.Size([2048, 1024]) torch.Size([1024, 1024])\n",
      "torch.Size([512, 2048, 1, 1]) torch.Size([512, 512]) torch.Size([512, 2048])\n",
      "torch.Size([512, 512, 3, 3]) torch.Size([512, 512]) torch.Size([512, 4608])\n",
      "torch.Size([512, 2048, 1, 1]) torch.Size([512, 512]) torch.Size([512, 2048])\n",
      "torch.Size([512, 512, 3, 3]) torch.Size([512, 512]) torch.Size([512, 4608])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ResNet(\n",
      "  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
      "  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "  (relu): ReLU(inplace=True)\n",
      "  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
      "  (layer1): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "          (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (layer2): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "          (1): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (layer3): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "          (1): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (4): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (5): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (layer4): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "          (1): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
      "  (fc): Linear(in_features=2048, out_features=1000, bias=True)\n",
      ")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [  0/196]\tTime  7.280 ( 7.280)\tLoss 4.9074e+00 (4.9074e+00)\tAcc@1  10.94 ( 10.94)\tAcc@5  31.25 ( 31.25)\n",
      "Test: [ 50/196]\tTime  0.431 ( 0.564)\tLoss 7.6985e+00 (6.6013e+00)\tAcc@1   0.00 (  1.72)\tAcc@5   0.00 (  5.78)\n",
      "Test: [100/196]\tTime  0.432 ( 0.498)\tLoss 6.8495e+00 (6.4894e+00)\tAcc@1   1.95 (  1.82)\tAcc@5   6.25 (  5.86)\n",
      "Test: [150/196]\tTime  0.432 ( 0.476)\tLoss 6.4366e+00 (6.4240e+00)\tAcc@1   2.34 (  2.04)\tAcc@5   3.52 (  6.67)\n",
      "start acc no bn 2.2719998359680176\n",
      "Epoch: [-1][   0/5005]\tTime  3.368 ( 3.368)\tData  2.435 ( 2.435)\tLoss 1.9493e+00 (1.9493e+00)\n",
      "Epoch: [-1][  50/5005]\tTime  0.637 ( 0.691)\tData  0.000 ( 0.048)\tLoss 2.1008e+00 (2.0358e+00)\n",
      "Test: [  0/196]\tTime  3.409 ( 3.409)\tLoss 1.6305e+00 (1.6305e+00)\tAcc@1  62.89 ( 62.89)\tAcc@5  86.72 ( 86.72)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.491)\tLoss 1.2554e+00 (1.6968e+00)\tAcc@1  74.61 ( 62.20)\tAcc@5  92.58 ( 86.47)\n",
      "Test: [100/196]\tTime  0.433 ( 0.462)\tLoss 2.4508e+00 (1.8642e+00)\tAcc@1  42.58 ( 59.11)\tAcc@5  73.44 ( 83.73)\n",
      "Test: [150/196]\tTime  0.433 ( 0.452)\tLoss 2.0563e+00 (1.9977e+00)\tAcc@1  60.94 ( 56.50)\tAcc@5  80.47 ( 81.08)\n",
      "start acc bn 55.76599884033203 2344421\n",
      "Epoch: [0][   0/5005]\tTime  2.991 ( 2.991)\tData  2.345 ( 2.345)\tLoss 2.0098e+00 (2.0098e+00)\n",
      "Epoch: [0][  50/5005]\tTime  0.638 ( 0.684)\tData  0.000 ( 0.046)\tLoss 1.3204e+00 (1.5791e+00)\n",
      "Epoch: [0][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.023)\tLoss 1.5290e+00 (1.4881e+00)\n",
      "Epoch: [0][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 1.1678e+00 (1.4546e+00)\n",
      "Epoch: [0][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 1.4091e+00 (1.4270e+00)\n",
      "Epoch: [0][ 250/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.010)\tLoss 1.3772e+00 (1.4105e+00)\n",
      "Epoch: [0][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 1.2990e+00 (1.3924e+00)\n",
      "Epoch: [0][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 1.1018e+00 (1.3833e+00)\n",
      "Epoch: [0][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.3158e+00 (1.3710e+00)\n",
      "Epoch: [0][ 450/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.2020e+00 (1.3616e+00)\n",
      "Epoch: [0][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.5342e+00 (1.3524e+00)\n",
      "Epoch: [0][ 550/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.2653e+00 (1.3473e+00)\n",
      "Epoch: [0][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.5306e+00 (1.3406e+00)\n",
      "Epoch: [0][ 650/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.2761e+00 (1.3360e+00)\n",
      "Epoch: [0][ 700/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.004)\tLoss 1.1945e+00 (1.3305e+00)\n",
      "Epoch: [0][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.3536e+00 (1.3252e+00)\n",
      "Epoch: [0][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.5012e+00 (1.3223e+00)\n",
      "Epoch: [0][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1339e+00 (1.3167e+00)\n",
      "Epoch: [0][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.3556e+00 (1.3117e+00)\n",
      "Epoch: [0][ 950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 1.3424e+00 (1.3067e+00)\n",
      "Epoch: [0][1000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 1.4113e+00 (1.3026e+00)\n",
      "Epoch: [0][1050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2556e+00 (1.2996e+00)\n",
      "Epoch: [0][1100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1205e+00 (1.2952e+00)\n",
      "Epoch: [0][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2533e+00 (1.2915e+00)\n",
      "Epoch: [0][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.4686e+00 (1.2886e+00)\n",
      "Epoch: [0][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1426e+00 (1.2856e+00)\n",
      "Epoch: [0][1300/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2857e+00 (1.2834e+00)\n",
      "Epoch: [0][1350/5005]\tTime  0.640 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2712e+00 (1.2802e+00)\n",
      "Epoch: [0][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.3672e+00 (1.2782e+00)\n",
      "Epoch: [0][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.3240e+00 (1.2748e+00)\n",
      "Epoch: [0][1500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.002)\tLoss 1.3690e+00 (1.2720e+00)\n",
      "Epoch: [0][1550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 1.1798e+00 (1.2694e+00)\n",
      "Epoch: [0][1600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 1.1455e+00 (1.2672e+00)\n",
      "Epoch: [0][1650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 1.2790e+00 (1.2652e+00)\n",
      "Epoch: [0][1700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 1.1115e+00 (1.2631e+00)\n",
      "Epoch: [0][1750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 1.3227e+00 (1.2609e+00)\n",
      "Epoch: [0][1800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2906e+00 (1.2578e+00)\n",
      "Epoch: [0][1850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1783e+00 (1.2558e+00)\n",
      "Epoch: [0][1900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2427e+00 (1.2544e+00)\n",
      "Epoch: [0][1950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2432e+00 (1.2526e+00)\n",
      "Epoch: [0][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1476e+00 (1.2507e+00)\n",
      "Epoch: [0][2050/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0594e+00 (1.2491e+00)\n",
      "Epoch: [0][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2623e+00 (1.2470e+00)\n",
      "Epoch: [0][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1407e+00 (1.2453e+00)\n",
      "Epoch: [0][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0804e+00 (1.2445e+00)\n",
      "Epoch: [0][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2077e+00 (1.2429e+00)\n",
      "Epoch: [0][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2014e+00 (1.2415e+00)\n",
      "Epoch: [0][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1802e+00 (1.2398e+00)\n",
      "Epoch: [0][2400/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1999e+00 (1.2382e+00)\n",
      "Epoch: [0][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2120e+00 (1.2370e+00)\n",
      "Epoch: [0][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2370e+00 (1.2354e+00)\n",
      "Epoch: [0][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3984e+00 (1.2342e+00)\n",
      "Epoch: [0][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2285e+00 (1.2331e+00)\n",
      "Epoch: [0][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1820e+00 (1.2320e+00)\n",
      "Epoch: [0][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2630e+00 (1.2305e+00)\n",
      "Epoch: [0][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5968e-01 (1.2292e+00)\n",
      "Epoch: [0][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2015e+00 (1.2281e+00)\n",
      "Epoch: [0][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1813e+00 (1.2274e+00)\n",
      "Epoch: [0][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2173e+00 (1.2262e+00)\n",
      "Epoch: [0][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6732e-01 (1.2249e+00)\n",
      "Epoch: [0][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1051e+00 (1.2244e+00)\n",
      "Epoch: [0][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8541e-01 (1.2236e+00)\n",
      "Epoch: [0][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1480e-01 (1.2223e+00)\n",
      "Epoch: [0][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0563e+00 (1.2215e+00)\n",
      "Epoch: [0][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1547e+00 (1.2204e+00)\n",
      "Epoch: [0][3250/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1668e+00 (1.2196e+00)\n",
      "Epoch: [0][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2979e+00 (1.2197e+00)\n",
      "Epoch: [0][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3207e+00 (1.2190e+00)\n",
      "Epoch: [0][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5454e-01 (1.2182e+00)\n",
      "Epoch: [0][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.4556e+00 (1.2177e+00)\n",
      "Epoch: [0][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2056e+00 (1.2168e+00)\n",
      "Epoch: [0][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1991e+00 (1.2163e+00)\n",
      "Epoch: [0][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2074e+00 (1.2151e+00)\n",
      "Epoch: [0][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4649e-01 (1.2141e+00)\n",
      "Epoch: [0][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2220e+00 (1.2139e+00)\n",
      "Epoch: [0][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2368e+00 (1.2136e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [0][3800/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1224e+00 (1.2128e+00)\n",
      "Epoch: [0][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2176e+00 (1.2117e+00)\n",
      "Epoch: [0][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2008e+00 (1.2109e+00)\n",
      "Epoch: [0][3950/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.3824e+00 (1.2104e+00)\n",
      "Epoch: [0][4000/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1442e+00 (1.2096e+00)\n",
      "Epoch: [0][4050/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1157e+00 (1.2090e+00)\n",
      "Epoch: [0][4100/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.0861e+00 (1.2083e+00)\n",
      "Epoch: [0][4150/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.2223e+00 (1.2074e+00)\n",
      "Epoch: [0][4200/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1863e+00 (1.2067e+00)\n",
      "Epoch: [0][4250/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1479e+00 (1.2062e+00)\n",
      "Epoch: [0][4300/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.2278e+00 (1.2059e+00)\n",
      "Epoch: [0][4350/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.2099e+00 (1.2052e+00)\n",
      "Epoch: [0][4400/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.3669e+00 (1.2047e+00)\n",
      "Epoch: [0][4450/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 9.9303e-01 (1.2040e+00)\n",
      "Epoch: [0][4500/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.0705e+00 (1.2035e+00)\n",
      "Epoch: [0][4550/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.2359e+00 (1.2027e+00)\n",
      "Epoch: [0][4600/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1103e+00 (1.2023e+00)\n",
      "Epoch: [0][4650/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.0950e+00 (1.2015e+00)\n",
      "Epoch: [0][4700/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1595e+00 (1.2010e+00)\n",
      "Epoch: [0][4750/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 9.9388e-01 (1.2007e+00)\n",
      "Epoch: [0][4800/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1492e+00 (1.2003e+00)\n",
      "Epoch: [0][4850/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.0277e+00 (1.1999e+00)\n",
      "Epoch: [0][4900/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1077e+00 (1.1997e+00)\n",
      "Epoch: [0][4950/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.1192e+00 (1.1991e+00)\n",
      "Epoch: [0][5000/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.0879e+00 (1.1986e+00)\n",
      "Test: [  0/196]\tTime  3.447 ( 3.447)\tLoss 7.0196e-01 (7.0196e-01)\tAcc@1  80.86 ( 80.86)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.492)\tLoss 5.0939e-01 (9.2268e-01)\tAcc@1  89.06 ( 75.43)\tAcc@5  97.66 ( 93.45)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.5611e+00 (1.0387e+00)\tAcc@1  61.33 ( 73.07)\tAcc@5  85.94 ( 92.10)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.3865e+00 (1.1704e+00)\tAcc@1  72.27 ( 70.65)\tAcc@5  85.16 ( 90.12)\n",
      "epoch 0 1.1985184061601661 69.61599731445312 0.0095 2344421 0.09999448081815601\n",
      "Epoch: [1][   0/5005]\tTime  3.119 ( 3.119)\tData  2.479 ( 2.479)\tLoss 1.1225e+00 (1.1225e+00)\n",
      "Epoch: [1][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.049)\tLoss 1.2000e+00 (1.1200e+00)\n",
      "Epoch: [1][ 100/5005]\tTime  0.637 ( 0.662)\tData  0.000 ( 0.025)\tLoss 1.1209e+00 (1.1156e+00)\n",
      "Epoch: [1][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 1.0786e+00 (1.1055e+00)\n",
      "Epoch: [1][ 200/5005]\tTime  0.639 ( 0.650)\tData  0.000 ( 0.013)\tLoss 9.9168e-01 (1.1067e+00)\n",
      "Epoch: [1][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.010)\tLoss 9.9000e-01 (1.1101e+00)\n",
      "Epoch: [1][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 1.1646e+00 (1.1092e+00)\n",
      "Epoch: [1][ 350/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.007)\tLoss 1.0308e+00 (1.1073e+00)\n",
      "Epoch: [1][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.006)\tLoss 1.0045e+00 (1.1088e+00)\n",
      "Epoch: [1][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.1606e+00 (1.1094e+00)\n",
      "Epoch: [1][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.2345e+00 (1.1108e+00)\n",
      "Epoch: [1][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.1667e+00 (1.1120e+00)\n",
      "Epoch: [1][ 600/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.004)\tLoss 9.9062e-01 (1.1112e+00)\n",
      "Epoch: [1][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.3082e+00 (1.1107e+00)\n",
      "Epoch: [1][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.1093e+00 (1.1097e+00)\n",
      "Epoch: [1][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.1664e-01 (1.1093e+00)\n",
      "Epoch: [1][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.1440e+00 (1.1082e+00)\n",
      "Epoch: [1][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.2334e+00 (1.1092e+00)\n",
      "Epoch: [1][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1128e+00 (1.1106e+00)\n",
      "Epoch: [1][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1627e+00 (1.1100e+00)\n",
      "Epoch: [1][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1568e+00 (1.1086e+00)\n",
      "Epoch: [1][1050/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.2229e+00 (1.1078e+00)\n",
      "Epoch: [1][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.2210e-01 (1.1073e+00)\n",
      "Epoch: [1][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.1674e+00 (1.1068e+00)\n",
      "Epoch: [1][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.1045e+00 (1.1060e+00)\n",
      "Epoch: [1][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0252e+00 (1.1058e+00)\n",
      "Epoch: [1][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5753e-01 (1.1060e+00)\n",
      "Epoch: [1][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1703e+00 (1.1061e+00)\n",
      "Epoch: [1][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1302e+00 (1.1059e+00)\n",
      "Epoch: [1][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0049e+00 (1.1057e+00)\n",
      "Epoch: [1][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0839e+00 (1.1055e+00)\n",
      "Epoch: [1][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1526e+00 (1.1058e+00)\n",
      "Epoch: [1][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0296e+00 (1.1068e+00)\n",
      "Epoch: [1][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0460e+00 (1.1064e+00)\n",
      "Epoch: [1][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2102e+00 (1.1069e+00)\n",
      "Epoch: [1][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5311e-01 (1.1068e+00)\n",
      "Epoch: [1][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0146e+00 (1.1069e+00)\n",
      "Epoch: [1][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1856e+00 (1.1065e+00)\n",
      "Epoch: [1][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0752e+00 (1.1062e+00)\n",
      "Epoch: [1][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0014e+00 (1.1064e+00)\n",
      "Epoch: [1][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1215e+00 (1.1067e+00)\n",
      "Epoch: [1][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.3554e+00 (1.1072e+00)\n",
      "Epoch: [1][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2298e+00 (1.1078e+00)\n",
      "Epoch: [1][2150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2984e+00 (1.1084e+00)\n",
      "Epoch: [1][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1976e+00 (1.1086e+00)\n",
      "Epoch: [1][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7207e-01 (1.1083e+00)\n",
      "Epoch: [1][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0223e+00 (1.1079e+00)\n",
      "Epoch: [1][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0689e+00 (1.1083e+00)\n",
      "Epoch: [1][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0377e+00 (1.1090e+00)\n",
      "Epoch: [1][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0425e+00 (1.1092e+00)\n",
      "Epoch: [1][2500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2992e-01 (1.1095e+00)\n",
      "Epoch: [1][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2194e+00 (1.1098e+00)\n",
      "Epoch: [1][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0292e+00 (1.1095e+00)\n",
      "Epoch: [1][2650/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1164e+00 (1.1095e+00)\n",
      "Epoch: [1][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0654e+00 (1.1094e+00)\n",
      "Epoch: [1][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2385e+00 (1.1092e+00)\n",
      "Epoch: [1][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0050e+00 (1.1093e+00)\n",
      "Epoch: [1][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1891e+00 (1.1094e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [1][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0831e+00 (1.1090e+00)\n",
      "Epoch: [1][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2613e+00 (1.1093e+00)\n",
      "Epoch: [1][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0088e+00 (1.1090e+00)\n",
      "Epoch: [1][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1010e+00 (1.1092e+00)\n",
      "Epoch: [1][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1196e+00 (1.1093e+00)\n",
      "Epoch: [1][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1383e+00 (1.1095e+00)\n",
      "Epoch: [1][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2880e+00 (1.1098e+00)\n",
      "Epoch: [1][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0990e+00 (1.1094e+00)\n",
      "Epoch: [1][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1370e+00 (1.1090e+00)\n",
      "Epoch: [1][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9987e-01 (1.1090e+00)\n",
      "Epoch: [1][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0621e+00 (1.1088e+00)\n",
      "Epoch: [1][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2331e+00 (1.1088e+00)\n",
      "Epoch: [1][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3625e+00 (1.1089e+00)\n",
      "Epoch: [1][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1456e+00 (1.1089e+00)\n",
      "Epoch: [1][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0231e+00 (1.1086e+00)\n",
      "Epoch: [1][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2176e+00 (1.1089e+00)\n",
      "Epoch: [1][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1010e+00 (1.1091e+00)\n",
      "Epoch: [1][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2052e+00 (1.1090e+00)\n",
      "Epoch: [1][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3586e+00 (1.1092e+00)\n",
      "Epoch: [1][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1894e+00 (1.1092e+00)\n",
      "Epoch: [1][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1987e+00 (1.1096e+00)\n",
      "Epoch: [1][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8469e-01 (1.1097e+00)\n",
      "Epoch: [1][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2969e+00 (1.1095e+00)\n",
      "Epoch: [1][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0927e+00 (1.1098e+00)\n",
      "Epoch: [1][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0618e+00 (1.1099e+00)\n",
      "Epoch: [1][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0999e+00 (1.1099e+00)\n",
      "Epoch: [1][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9276e-01 (1.1095e+00)\n",
      "Epoch: [1][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6335e-01 (1.1094e+00)\n",
      "Epoch: [1][4300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2667e+00 (1.1093e+00)\n",
      "Epoch: [1][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5785e-01 (1.1091e+00)\n",
      "Epoch: [1][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5016e-01 (1.1091e+00)\n",
      "Epoch: [1][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7882e-01 (1.1089e+00)\n",
      "Epoch: [1][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0524e+00 (1.1092e+00)\n",
      "Epoch: [1][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0676e+00 (1.1094e+00)\n",
      "Epoch: [1][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0510e+00 (1.1094e+00)\n",
      "Epoch: [1][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2189e+00 (1.1096e+00)\n",
      "Epoch: [1][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5765e-01 (1.1096e+00)\n",
      "Epoch: [1][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0863e+00 (1.1095e+00)\n",
      "Epoch: [1][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0982e+00 (1.1098e+00)\n",
      "Epoch: [1][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3617e+00 (1.1098e+00)\n",
      "Epoch: [1][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0136e+00 (1.1097e+00)\n",
      "Epoch: [1][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2017e+00 (1.1098e+00)\n",
      "Epoch: [1][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6359e-01 (1.1098e+00)\n",
      "Test: [  0/196]\tTime  3.461 ( 3.461)\tLoss 6.3248e-01 (6.3248e-01)\tAcc@1  82.42 ( 82.42)\tAcc@5  96.09 ( 96.09)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 5.6206e-01 (9.0610e-01)\tAcc@1  83.20 ( 75.97)\tAcc@5  97.27 ( 93.57)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.6446e+00 (1.0336e+00)\tAcc@1  57.42 ( 73.28)\tAcc@5  84.38 ( 92.08)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1130e+00 (1.1708e+00)\tAcc@1  76.95 ( 70.65)\tAcc@5  87.50 ( 90.16)\n",
      "epoch 1 1.1096708709190135 69.79000091552734 0.009000000000000001 2344421 0.09999448081815601\n",
      "Epoch: [2][   0/5005]\tTime  2.953 ( 2.953)\tData  2.315 ( 2.315)\tLoss 1.0691e+00 (1.0691e+00)\n",
      "Epoch: [2][  50/5005]\tTime  0.637 ( 0.683)\tData  0.000 ( 0.046)\tLoss 1.1392e+00 (1.0434e+00)\n",
      "Epoch: [2][ 100/5005]\tTime  0.638 ( 0.660)\tData  0.000 ( 0.023)\tLoss 1.2377e+00 (1.0490e+00)\n",
      "Epoch: [2][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 8.9018e-01 (1.0518e+00)\n",
      "Epoch: [2][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 9.8866e-01 (1.0621e+00)\n",
      "Epoch: [2][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.009)\tLoss 1.0141e+00 (1.0614e+00)\n",
      "Epoch: [2][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 1.0958e+00 (1.0624e+00)\n",
      "Epoch: [2][ 350/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 1.1064e+00 (1.0626e+00)\n",
      "Epoch: [2][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.8926e-01 (1.0629e+00)\n",
      "Epoch: [2][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.005)\tLoss 1.1362e+00 (1.0613e+00)\n",
      "Epoch: [2][ 500/5005]\tTime  0.639 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.2097e+00 (1.0612e+00)\n",
      "Epoch: [2][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 1.0830e+00 (1.0617e+00)\n",
      "Epoch: [2][ 600/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0395e+00 (1.0635e+00)\n",
      "Epoch: [2][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0225e+00 (1.0631e+00)\n",
      "Epoch: [2][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0930e+00 (1.0655e+00)\n",
      "Epoch: [2][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.1262e+00 (1.0663e+00)\n",
      "Epoch: [2][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0732e+00 (1.0675e+00)\n",
      "Epoch: [2][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0175e+00 (1.0684e+00)\n",
      "Epoch: [2][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.3199e+00 (1.0691e+00)\n",
      "Epoch: [2][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.5948e-01 (1.0706e+00)\n",
      "Epoch: [2][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1321e+00 (1.0718e+00)\n",
      "Epoch: [2][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0701e+00 (1.0710e+00)\n",
      "Epoch: [2][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.8911e-01 (1.0701e+00)\n",
      "Epoch: [2][1150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5346e-01 (1.0707e+00)\n",
      "Epoch: [2][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1239e+00 (1.0716e+00)\n",
      "Epoch: [2][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0438e+00 (1.0715e+00)\n",
      "Epoch: [2][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0921e+00 (1.0728e+00)\n",
      "Epoch: [2][1350/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1106e+00 (1.0731e+00)\n",
      "Epoch: [2][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2617e+00 (1.0728e+00)\n",
      "Epoch: [2][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1715e+00 (1.0728e+00)\n",
      "Epoch: [2][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0596e+00 (1.0717e+00)\n",
      "Epoch: [2][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0746e+00 (1.0722e+00)\n",
      "Epoch: [2][1600/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2353e+00 (1.0725e+00)\n",
      "Epoch: [2][1650/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0868e+00 (1.0729e+00)\n",
      "Epoch: [2][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2499e+00 (1.0725e+00)\n",
      "Epoch: [2][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8488e-01 (1.0726e+00)\n",
      "Epoch: [2][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0971e+00 (1.0725e+00)\n",
      "Epoch: [2][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0992e+00 (1.0723e+00)\n",
      "Epoch: [2][1900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1979e+00 (1.0725e+00)\n",
      "Epoch: [2][1950/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1912e-01 (1.0723e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [2][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1498e+00 (1.0724e+00)\n",
      "Epoch: [2][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.9019e-01 (1.0723e+00)\n",
      "Epoch: [2][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0893e+00 (1.0725e+00)\n",
      "Epoch: [2][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2128e+00 (1.0722e+00)\n",
      "Epoch: [2][2200/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0592e+00 (1.0720e+00)\n",
      "Epoch: [2][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0690e+00 (1.0718e+00)\n",
      "Epoch: [2][2300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1008e+00 (1.0723e+00)\n",
      "Epoch: [2][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1424e+00 (1.0724e+00)\n",
      "Epoch: [2][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0746e+00 (1.0724e+00)\n",
      "Epoch: [2][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1650e+00 (1.0725e+00)\n",
      "Epoch: [2][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1515e+00 (1.0722e+00)\n",
      "Epoch: [2][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3384e+00 (1.0723e+00)\n",
      "Epoch: [2][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0278e+00 (1.0724e+00)\n",
      "Epoch: [2][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0932e+00 (1.0723e+00)\n",
      "Epoch: [2][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0562e+00 (1.0722e+00)\n",
      "Epoch: [2][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0307e+00 (1.0731e+00)\n",
      "Epoch: [2][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1895e+00 (1.0733e+00)\n",
      "Epoch: [2][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2132e+00 (1.0733e+00)\n",
      "Epoch: [2][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2285e+00 (1.0736e+00)\n",
      "Epoch: [2][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1309e+00 (1.0742e+00)\n",
      "Epoch: [2][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8319e-01 (1.0744e+00)\n",
      "Epoch: [2][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0327e+00 (1.0746e+00)\n",
      "Epoch: [2][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5700e-01 (1.0746e+00)\n",
      "Epoch: [2][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2074e+00 (1.0748e+00)\n",
      "Epoch: [2][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1556e-01 (1.0746e+00)\n",
      "Epoch: [2][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1342e+00 (1.0746e+00)\n",
      "Epoch: [2][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8536e-01 (1.0747e+00)\n",
      "Epoch: [2][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1557e+00 (1.0751e+00)\n",
      "Epoch: [2][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1735e+00 (1.0753e+00)\n",
      "Epoch: [2][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1017e+00 (1.0751e+00)\n",
      "Epoch: [2][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1237e+00 (1.0755e+00)\n",
      "Epoch: [2][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6094e-01 (1.0759e+00)\n",
      "Epoch: [2][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0733e+00 (1.0760e+00)\n",
      "Epoch: [2][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0004e+00 (1.0761e+00)\n",
      "Epoch: [2][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2900e-01 (1.0764e+00)\n",
      "Epoch: [2][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0893e+00 (1.0766e+00)\n",
      "Epoch: [2][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1108e+00 (1.0768e+00)\n",
      "Epoch: [2][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1178e+00 (1.0769e+00)\n",
      "Epoch: [2][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1337e+00 (1.0770e+00)\n",
      "Epoch: [2][3950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0795e+00 (1.0770e+00)\n",
      "Epoch: [2][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0725e+00 (1.0770e+00)\n",
      "Epoch: [2][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2998e+00 (1.0774e+00)\n",
      "Epoch: [2][4100/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0545e+00 (1.0773e+00)\n",
      "Epoch: [2][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0733e+00 (1.0775e+00)\n",
      "Epoch: [2][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0756e+00 (1.0777e+00)\n",
      "Epoch: [2][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1800e+00 (1.0778e+00)\n",
      "Epoch: [2][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9330e-01 (1.0777e+00)\n",
      "Epoch: [2][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1517e+00 (1.0779e+00)\n",
      "Epoch: [2][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0694e+00 (1.0781e+00)\n",
      "Epoch: [2][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1572e+00 (1.0782e+00)\n",
      "Epoch: [2][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0224e+00 (1.0785e+00)\n",
      "Epoch: [2][4550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0098e+00 (1.0788e+00)\n",
      "Epoch: [2][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1943e+00 (1.0789e+00)\n",
      "Epoch: [2][4650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3264e+00 (1.0794e+00)\n",
      "Epoch: [2][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1857e+00 (1.0793e+00)\n",
      "Epoch: [2][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5394e-01 (1.0798e+00)\n",
      "Epoch: [2][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1040e+00 (1.0800e+00)\n",
      "Epoch: [2][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0954e+00 (1.0801e+00)\n",
      "Epoch: [2][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1150e+00 (1.0805e+00)\n",
      "Epoch: [2][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0749e+00 (1.0807e+00)\n",
      "Epoch: [2][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1093e+00 (1.0807e+00)\n",
      "Test: [  0/196]\tTime  3.334 ( 3.334)\tLoss 7.0092e-01 (7.0092e-01)\tAcc@1  82.42 ( 82.42)\tAcc@5  95.31 ( 95.31)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.490)\tLoss 7.8046e-01 (9.0505e-01)\tAcc@1  77.73 ( 75.80)\tAcc@5  94.53 ( 93.54)\n",
      "Test: [100/196]\tTime  0.433 ( 0.462)\tLoss 1.5765e+00 (1.0419e+00)\tAcc@1  57.42 ( 72.90)\tAcc@5  85.55 ( 92.07)\n",
      "Test: [150/196]\tTime  0.433 ( 0.453)\tLoss 1.4011e+00 (1.1521e+00)\tAcc@1  71.09 ( 70.88)\tAcc@5  85.94 ( 90.51)\n",
      "epoch 2 1.0807167228360612 69.72200012207031 0.0085 2344421 0.09999448081815601\n",
      "Epoch: [3][   0/5005]\tTime  3.104 ( 3.104)\tData  2.463 ( 2.463)\tLoss 9.9800e-01 (9.9800e-01)\n",
      "Epoch: [3][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.048)\tLoss 1.0302e+00 (1.0568e+00)\n",
      "Epoch: [3][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.025)\tLoss 1.0107e+00 (1.0513e+00)\n",
      "Epoch: [3][ 150/5005]\tTime  0.638 ( 0.654)\tData  0.000 ( 0.017)\tLoss 1.0075e+00 (1.0360e+00)\n",
      "Epoch: [3][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.012)\tLoss 1.1420e+00 (1.0355e+00)\n",
      "Epoch: [3][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.010)\tLoss 9.9380e-01 (1.0396e+00)\n",
      "Epoch: [3][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 1.0504e+00 (1.0411e+00)\n",
      "Epoch: [3][ 350/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.007)\tLoss 1.0807e+00 (1.0422e+00)\n",
      "Epoch: [3][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 1.0568e+00 (1.0442e+00)\n",
      "Epoch: [3][ 450/5005]\tTime  0.639 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.3284e-01 (1.0414e+00)\n",
      "Epoch: [3][ 500/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.005)\tLoss 1.0745e+00 (1.0433e+00)\n",
      "Epoch: [3][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.2125e-01 (1.0420e+00)\n",
      "Epoch: [3][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 1.1487e+00 (1.0422e+00)\n",
      "Epoch: [3][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.1837e+00 (1.0432e+00)\n",
      "Epoch: [3][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0995e+00 (1.0450e+00)\n",
      "Epoch: [3][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.0146e+00 (1.0460e+00)\n",
      "Epoch: [3][ 800/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.1808e+00 (1.0458e+00)\n",
      "Epoch: [3][ 850/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0081e+00 (1.0461e+00)\n",
      "Epoch: [3][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0911e+00 (1.0472e+00)\n",
      "Epoch: [3][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0664e+00 (1.0463e+00)\n",
      "Epoch: [3][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0987e+00 (1.0461e+00)\n",
      "Epoch: [3][1050/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1692e+00 (1.0468e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [3][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0415e+00 (1.0471e+00)\n",
      "Epoch: [3][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0560e+00 (1.0476e+00)\n",
      "Epoch: [3][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0679e+00 (1.0473e+00)\n",
      "Epoch: [3][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0458e+00 (1.0490e+00)\n",
      "Epoch: [3][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6608e-01 (1.0494e+00)\n",
      "Epoch: [3][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0949e+00 (1.0497e+00)\n",
      "Epoch: [3][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7061e-01 (1.0499e+00)\n",
      "Epoch: [3][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0023e+00 (1.0509e+00)\n",
      "Epoch: [3][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7926e-01 (1.0519e+00)\n",
      "Epoch: [3][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6315e-01 (1.0519e+00)\n",
      "Epoch: [3][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0579e+00 (1.0519e+00)\n",
      "Epoch: [3][1650/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1057e+00 (1.0523e+00)\n",
      "Epoch: [3][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8235e-01 (1.0524e+00)\n",
      "Epoch: [3][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0475e+00 (1.0531e+00)\n",
      "Epoch: [3][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0233e+00 (1.0526e+00)\n",
      "Epoch: [3][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1029e+00 (1.0521e+00)\n",
      "Epoch: [3][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0440e+00 (1.0514e+00)\n",
      "Epoch: [3][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.6310e-01 (1.0515e+00)\n",
      "Epoch: [3][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2058e+00 (1.0523e+00)\n",
      "Epoch: [3][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1374e+00 (1.0532e+00)\n",
      "Epoch: [3][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5660e-01 (1.0532e+00)\n",
      "Epoch: [3][2150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.9810e-01 (1.0533e+00)\n",
      "Epoch: [3][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2914e+00 (1.0533e+00)\n",
      "Epoch: [3][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5802e-01 (1.0534e+00)\n",
      "Epoch: [3][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0288e+00 (1.0539e+00)\n",
      "Epoch: [3][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1694e+00 (1.0542e+00)\n",
      "Epoch: [3][2400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1098e+00 (1.0548e+00)\n",
      "Epoch: [3][2450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1184e+00 (1.0558e+00)\n",
      "Epoch: [3][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0826e+00 (1.0567e+00)\n",
      "Epoch: [3][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6408e-01 (1.0567e+00)\n",
      "Epoch: [3][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7595e-01 (1.0566e+00)\n",
      "Epoch: [3][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0462e+00 (1.0568e+00)\n",
      "Epoch: [3][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0077e+00 (1.0572e+00)\n",
      "Epoch: [3][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0184e+00 (1.0575e+00)\n",
      "Epoch: [3][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0135e+00 (1.0574e+00)\n",
      "Epoch: [3][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0832e+00 (1.0572e+00)\n",
      "Epoch: [3][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0167e+00 (1.0571e+00)\n",
      "Epoch: [3][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1146e+00 (1.0574e+00)\n",
      "Epoch: [3][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6927e-01 (1.0578e+00)\n",
      "Epoch: [3][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0453e+00 (1.0579e+00)\n",
      "Epoch: [3][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0997e+00 (1.0584e+00)\n",
      "Epoch: [3][3150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1395e+00 (1.0590e+00)\n",
      "Epoch: [3][3200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0839e+00 (1.0591e+00)\n",
      "Epoch: [3][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0520e+00 (1.0594e+00)\n",
      "Epoch: [3][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0200e+00 (1.0596e+00)\n",
      "Epoch: [3][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1912e+00 (1.0591e+00)\n",
      "Epoch: [3][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0716e+00 (1.0591e+00)\n",
      "Epoch: [3][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0376e+00 (1.0592e+00)\n",
      "Epoch: [3][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1820e+00 (1.0591e+00)\n",
      "Epoch: [3][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1382e+00 (1.0594e+00)\n",
      "Epoch: [3][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0101e+00 (1.0591e+00)\n",
      "Epoch: [3][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0022e+00 (1.0590e+00)\n",
      "Epoch: [3][3700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2568e+00 (1.0597e+00)\n",
      "Epoch: [3][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1128e+00 (1.0598e+00)\n",
      "Epoch: [3][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0719e+00 (1.0598e+00)\n",
      "Epoch: [3][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2460e+00 (1.0598e+00)\n",
      "Epoch: [3][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8086e-01 (1.0599e+00)\n",
      "Epoch: [3][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0318e+00 (1.0597e+00)\n",
      "Epoch: [3][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2532e+00 (1.0600e+00)\n",
      "Epoch: [3][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1422e+00 (1.0603e+00)\n",
      "Epoch: [3][4100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2559e+00 (1.0604e+00)\n",
      "Epoch: [3][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9875e-01 (1.0607e+00)\n",
      "Epoch: [3][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0369e+00 (1.0608e+00)\n",
      "Epoch: [3][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0535e+00 (1.0609e+00)\n",
      "Epoch: [3][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2129e+00 (1.0608e+00)\n",
      "Epoch: [3][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3139e+00 (1.0611e+00)\n",
      "Epoch: [3][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8994e-01 (1.0612e+00)\n",
      "Epoch: [3][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1252e+00 (1.0611e+00)\n",
      "Epoch: [3][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0093e+00 (1.0609e+00)\n",
      "Epoch: [3][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0735e+00 (1.0612e+00)\n",
      "Epoch: [3][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1641e+00 (1.0612e+00)\n",
      "Epoch: [3][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1011e+00 (1.0614e+00)\n",
      "Epoch: [3][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1135e+00 (1.0613e+00)\n",
      "Epoch: [3][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0791e+00 (1.0613e+00)\n",
      "Epoch: [3][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6645e-01 (1.0617e+00)\n",
      "Epoch: [3][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0713e+00 (1.0619e+00)\n",
      "Epoch: [3][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2119e+00 (1.0621e+00)\n",
      "Epoch: [3][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0364e+00 (1.0625e+00)\n",
      "Epoch: [3][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1453e+00 (1.0627e+00)\n",
      "Test: [  0/196]\tTime  3.378 ( 3.378)\tLoss 6.9237e-01 (6.9237e-01)\tAcc@1  81.25 ( 81.25)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.491)\tLoss 5.5013e-01 (8.5798e-01)\tAcc@1  85.55 ( 76.76)\tAcc@5  96.09 ( 94.16)\n",
      "Test: [100/196]\tTime  0.434 ( 0.462)\tLoss 1.6280e+00 (9.9160e-01)\tAcc@1  56.25 ( 74.01)\tAcc@5  85.55 ( 92.64)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.2043e+00 (1.1199e+00)\tAcc@1  74.61 ( 71.56)\tAcc@5  88.67 ( 90.86)\n",
      "epoch 3 1.0627262323601225 70.57799530029297 0.008000000000000002 2344421 0.09999448081815601\n",
      "Epoch: [4][   0/5005]\tTime  2.954 ( 2.954)\tData  2.315 ( 2.315)\tLoss 1.0476e+00 (1.0476e+00)\n",
      "Epoch: [4][  50/5005]\tTime  0.637 ( 0.683)\tData  0.000 ( 0.046)\tLoss 9.5874e-01 (1.0214e+00)\n",
      "Epoch: [4][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.023)\tLoss 1.1282e+00 (1.0191e+00)\n",
      "Epoch: [4][ 150/5005]\tTime  0.638 ( 0.653)\tData  0.000 ( 0.016)\tLoss 1.1214e+00 (1.0224e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [4][ 200/5005]\tTime  0.638 ( 0.649)\tData  0.000 ( 0.012)\tLoss 9.9392e-01 (1.0221e+00)\n",
      "Epoch: [4][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.009)\tLoss 9.9891e-01 (1.0284e+00)\n",
      "Epoch: [4][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 9.4518e-01 (1.0243e+00)\n",
      "Epoch: [4][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 1.1287e+00 (1.0250e+00)\n",
      "Epoch: [4][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.0423e+00 (1.0243e+00)\n",
      "Epoch: [4][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.005)\tLoss 1.0418e+00 (1.0240e+00)\n",
      "Epoch: [4][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.6181e-01 (1.0240e+00)\n",
      "Epoch: [4][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.004)\tLoss 1.0351e+00 (1.0247e+00)\n",
      "Epoch: [4][ 600/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0540e+00 (1.0270e+00)\n",
      "Epoch: [4][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.2585e+00 (1.0290e+00)\n",
      "Epoch: [4][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.1068e+00 (1.0282e+00)\n",
      "Epoch: [4][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 9.2930e-01 (1.0289e+00)\n",
      "Epoch: [4][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0313e+00 (1.0298e+00)\n",
      "Epoch: [4][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0062e+00 (1.0315e+00)\n",
      "Epoch: [4][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.8285e-01 (1.0330e+00)\n",
      "Epoch: [4][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.8204e-01 (1.0334e+00)\n",
      "Epoch: [4][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1293e+00 (1.0348e+00)\n",
      "Epoch: [4][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.1716e+00 (1.0349e+00)\n",
      "Epoch: [4][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.6878e-01 (1.0341e+00)\n",
      "Epoch: [4][1150/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.6416e-01 (1.0346e+00)\n",
      "Epoch: [4][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0885e+00 (1.0345e+00)\n",
      "Epoch: [4][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0034e+00 (1.0347e+00)\n",
      "Epoch: [4][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9364e-01 (1.0356e+00)\n",
      "Epoch: [4][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0573e+00 (1.0369e+00)\n",
      "Epoch: [4][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1343e+00 (1.0375e+00)\n",
      "Epoch: [4][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1726e+00 (1.0378e+00)\n",
      "Epoch: [4][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4978e-01 (1.0375e+00)\n",
      "Epoch: [4][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1759e+00 (1.0376e+00)\n",
      "Epoch: [4][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0846e+00 (1.0376e+00)\n",
      "Epoch: [4][1650/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1731e+00 (1.0374e+00)\n",
      "Epoch: [4][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1590e+00 (1.0376e+00)\n",
      "Epoch: [4][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0841e+00 (1.0366e+00)\n",
      "Epoch: [4][1800/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.5639e-01 (1.0372e+00)\n",
      "Epoch: [4][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.7591e-01 (1.0378e+00)\n",
      "Epoch: [4][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1286e+00 (1.0371e+00)\n",
      "Epoch: [4][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3610e-01 (1.0372e+00)\n",
      "Epoch: [4][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.4306e+00 (1.0376e+00)\n",
      "Epoch: [4][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1753e+00 (1.0382e+00)\n",
      "Epoch: [4][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0508e+00 (1.0379e+00)\n",
      "Epoch: [4][2150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.9173e-01 (1.0385e+00)\n",
      "Epoch: [4][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2128e+00 (1.0389e+00)\n",
      "Epoch: [4][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0635e+00 (1.0388e+00)\n",
      "Epoch: [4][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0341e+00 (1.0391e+00)\n",
      "Epoch: [4][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.2246e-01 (1.0394e+00)\n",
      "Epoch: [4][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5222e-01 (1.0396e+00)\n",
      "Epoch: [4][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2301e+00 (1.0407e+00)\n",
      "Epoch: [4][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6941e-01 (1.0406e+00)\n",
      "Epoch: [4][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2874e-01 (1.0404e+00)\n",
      "Epoch: [4][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8520e-01 (1.0407e+00)\n",
      "Epoch: [4][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7406e-01 (1.0413e+00)\n",
      "Epoch: [4][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9812e-01 (1.0412e+00)\n",
      "Epoch: [4][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7853e-01 (1.0411e+00)\n",
      "Epoch: [4][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7216e-01 (1.0412e+00)\n",
      "Epoch: [4][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0432e+00 (1.0415e+00)\n",
      "Epoch: [4][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4688e-01 (1.0413e+00)\n",
      "Epoch: [4][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2046e-01 (1.0416e+00)\n",
      "Epoch: [4][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9439e-01 (1.0415e+00)\n",
      "Epoch: [4][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1805e+00 (1.0418e+00)\n",
      "Epoch: [4][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0695e+00 (1.0418e+00)\n",
      "Epoch: [4][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1512e+00 (1.0418e+00)\n",
      "Epoch: [4][3200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2460e-01 (1.0419e+00)\n",
      "Epoch: [4][3250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0437e+00 (1.0420e+00)\n",
      "Epoch: [4][3300/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1907e+00 (1.0423e+00)\n",
      "Epoch: [4][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1354e+00 (1.0420e+00)\n",
      "Epoch: [4][3400/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9565e-01 (1.0418e+00)\n",
      "Epoch: [4][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0694e+00 (1.0421e+00)\n",
      "Epoch: [4][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0164e+00 (1.0420e+00)\n",
      "Epoch: [4][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4811e-01 (1.0422e+00)\n",
      "Epoch: [4][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1819e+00 (1.0424e+00)\n",
      "Epoch: [4][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8002e-01 (1.0422e+00)\n",
      "Epoch: [4][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0593e+00 (1.0426e+00)\n",
      "Epoch: [4][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2122e+00 (1.0427e+00)\n",
      "Epoch: [4][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0559e+00 (1.0425e+00)\n",
      "Epoch: [4][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0094e+00 (1.0426e+00)\n",
      "Epoch: [4][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0466e+00 (1.0429e+00)\n",
      "Epoch: [4][3950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0938e+00 (1.0431e+00)\n",
      "Epoch: [4][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5766e-01 (1.0435e+00)\n",
      "Epoch: [4][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0287e-01 (1.0437e+00)\n",
      "Epoch: [4][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0036e+00 (1.0437e+00)\n",
      "Epoch: [4][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2459e+00 (1.0440e+00)\n",
      "Epoch: [4][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0271e+00 (1.0441e+00)\n",
      "Epoch: [4][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1131e+00 (1.0444e+00)\n",
      "Epoch: [4][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7647e-01 (1.0443e+00)\n",
      "Epoch: [4][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0767e+00 (1.0445e+00)\n",
      "Epoch: [4][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5460e-01 (1.0448e+00)\n",
      "Epoch: [4][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1088e+00 (1.0449e+00)\n",
      "Epoch: [4][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0549e+00 (1.0453e+00)\n",
      "Epoch: [4][4550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0415e+00 (1.0456e+00)\n",
      "Epoch: [4][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0359e+00 (1.0459e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [4][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0631e+00 (1.0462e+00)\n",
      "Epoch: [4][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2646e-01 (1.0463e+00)\n",
      "Epoch: [4][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7134e-01 (1.0465e+00)\n",
      "Epoch: [4][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1815e+00 (1.0469e+00)\n",
      "Epoch: [4][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0351e+00 (1.0470e+00)\n",
      "Epoch: [4][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9591e-01 (1.0470e+00)\n",
      "Epoch: [4][4950/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8369e-01 (1.0469e+00)\n",
      "Epoch: [4][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2114e+00 (1.0473e+00)\n",
      "Test: [  0/196]\tTime  3.411 ( 3.411)\tLoss 6.9913e-01 (6.9913e-01)\tAcc@1  81.25 ( 81.25)\tAcc@5  96.09 ( 96.09)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.491)\tLoss 6.1318e-01 (8.9559e-01)\tAcc@1  82.42 ( 76.17)\tAcc@5  96.48 ( 93.78)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.4416e+00 (1.0237e+00)\tAcc@1  61.72 ( 73.39)\tAcc@5  85.94 ( 92.18)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.2327e+00 (1.1517e+00)\tAcc@1  75.00 ( 70.93)\tAcc@5  87.11 ( 90.46)\n",
      "epoch 4 1.0473253957219968 69.93999481201172 0.0075000000000000015 2344421 0.09999448081815601\n",
      "Epoch: [5][   0/5005]\tTime  3.073 ( 3.073)\tData  2.434 ( 2.434)\tLoss 9.4396e-01 (9.4396e-01)\n",
      "Epoch: [5][  50/5005]\tTime  0.637 ( 0.685)\tData  0.000 ( 0.048)\tLoss 8.9271e-01 (9.9571e-01)\n",
      "Epoch: [5][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.024)\tLoss 1.0166e+00 (1.0017e+00)\n",
      "Epoch: [5][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.016)\tLoss 9.6852e-01 (1.0026e+00)\n",
      "Epoch: [5][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.012)\tLoss 1.0658e+00 (1.0044e+00)\n",
      "Epoch: [5][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 9.7918e-01 (1.0086e+00)\n",
      "Epoch: [5][ 300/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.008)\tLoss 1.1985e+00 (1.0104e+00)\n",
      "Epoch: [5][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 8.8035e-01 (1.0127e+00)\n",
      "Epoch: [5][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.1063e+00 (1.0138e+00)\n",
      "Epoch: [5][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.1773e+00 (1.0110e+00)\n",
      "Epoch: [5][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.1559e+00 (1.0122e+00)\n",
      "Epoch: [5][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.0480e+00 (1.0146e+00)\n",
      "Epoch: [5][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.4689e-01 (1.0141e+00)\n",
      "Epoch: [5][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.1134e+00 (1.0142e+00)\n",
      "Epoch: [5][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0862e+00 (1.0142e+00)\n",
      "Epoch: [5][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.0790e+00 (1.0141e+00)\n",
      "Epoch: [5][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.7530e-01 (1.0160e+00)\n",
      "Epoch: [5][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.7368e-01 (1.0170e+00)\n",
      "Epoch: [5][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9247e-01 (1.0170e+00)\n",
      "Epoch: [5][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1892e+00 (1.0172e+00)\n",
      "Epoch: [5][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.5431e-01 (1.0188e+00)\n",
      "Epoch: [5][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0549e+00 (1.0198e+00)\n",
      "Epoch: [5][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.2498e-01 (1.0201e+00)\n",
      "Epoch: [5][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0321e+00 (1.0201e+00)\n",
      "Epoch: [5][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1769e+00 (1.0204e+00)\n",
      "Epoch: [5][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0723e+00 (1.0210e+00)\n",
      "Epoch: [5][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1014e-01 (1.0223e+00)\n",
      "Epoch: [5][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5232e-01 (1.0232e+00)\n",
      "Epoch: [5][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4722e-01 (1.0239e+00)\n",
      "Epoch: [5][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0558e+00 (1.0233e+00)\n",
      "Epoch: [5][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0464e+00 (1.0241e+00)\n",
      "Epoch: [5][1550/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6242e-01 (1.0241e+00)\n",
      "Epoch: [5][1600/5005]\tTime  0.640 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6572e-01 (1.0240e+00)\n",
      "Epoch: [5][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0885e+00 (1.0244e+00)\n",
      "Epoch: [5][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9862e-01 (1.0243e+00)\n",
      "Epoch: [5][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0994e+00 (1.0255e+00)\n",
      "Epoch: [5][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1964e+00 (1.0257e+00)\n",
      "Epoch: [5][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5575e-01 (1.0259e+00)\n",
      "Epoch: [5][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1298e+00 (1.0260e+00)\n",
      "Epoch: [5][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0086e+00 (1.0263e+00)\n",
      "Epoch: [5][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5947e-01 (1.0260e+00)\n",
      "Epoch: [5][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.0444e-01 (1.0261e+00)\n",
      "Epoch: [5][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0449e+00 (1.0266e+00)\n",
      "Epoch: [5][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0626e+00 (1.0267e+00)\n",
      "Epoch: [5][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0129e+00 (1.0269e+00)\n",
      "Epoch: [5][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0192e+00 (1.0268e+00)\n",
      "Epoch: [5][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0668e+00 (1.0268e+00)\n",
      "Epoch: [5][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0583e+00 (1.0272e+00)\n",
      "Epoch: [5][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0692e+00 (1.0273e+00)\n",
      "Epoch: [5][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0746e+00 (1.0270e+00)\n",
      "Epoch: [5][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6617e-01 (1.0267e+00)\n",
      "Epoch: [5][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8562e-01 (1.0274e+00)\n",
      "Epoch: [5][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3023e-01 (1.0272e+00)\n",
      "Epoch: [5][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1289e+00 (1.0272e+00)\n",
      "Epoch: [5][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1536e+00 (1.0274e+00)\n",
      "Epoch: [5][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0266e+00 (1.0273e+00)\n",
      "Epoch: [5][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0716e+00 (1.0276e+00)\n",
      "Epoch: [5][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7999e-01 (1.0276e+00)\n",
      "Epoch: [5][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0500e+00 (1.0276e+00)\n",
      "Epoch: [5][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0884e+00 (1.0279e+00)\n",
      "Epoch: [5][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1424e+00 (1.0281e+00)\n",
      "Epoch: [5][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6920e-01 (1.0279e+00)\n",
      "Epoch: [5][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0358e-01 (1.0279e+00)\n",
      "Epoch: [5][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0679e+00 (1.0278e+00)\n",
      "Epoch: [5][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0473e+00 (1.0279e+00)\n",
      "Epoch: [5][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0586e+00 (1.0278e+00)\n",
      "Epoch: [5][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0900e+00 (1.0283e+00)\n",
      "Epoch: [5][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2343e+00 (1.0284e+00)\n",
      "Epoch: [5][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9291e-01 (1.0283e+00)\n",
      "Epoch: [5][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0774e+00 (1.0286e+00)\n",
      "Epoch: [5][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2340e+00 (1.0290e+00)\n",
      "Epoch: [5][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6599e-01 (1.0293e+00)\n",
      "Epoch: [5][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1761e+00 (1.0294e+00)\n",
      "Epoch: [5][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1185e+00 (1.0293e+00)\n",
      "Epoch: [5][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1813e+00 (1.0298e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [5][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0139e+00 (1.0297e+00)\n",
      "Epoch: [5][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0952e+00 (1.0299e+00)\n",
      "Epoch: [5][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3158e-01 (1.0301e+00)\n",
      "Epoch: [5][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0243e+00 (1.0301e+00)\n",
      "Epoch: [5][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0088e+00 (1.0300e+00)\n",
      "Epoch: [5][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2495e+00 (1.0303e+00)\n",
      "Epoch: [5][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0657e+00 (1.0307e+00)\n",
      "Epoch: [5][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1721e+00 (1.0308e+00)\n",
      "Epoch: [5][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0266e+00 (1.0306e+00)\n",
      "Epoch: [5][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0445e+00 (1.0307e+00)\n",
      "Epoch: [5][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1713e-01 (1.0309e+00)\n",
      "Epoch: [5][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3053e+00 (1.0310e+00)\n",
      "Epoch: [5][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8360e-01 (1.0307e+00)\n",
      "Epoch: [5][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3411e+00 (1.0310e+00)\n",
      "Epoch: [5][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6197e-01 (1.0310e+00)\n",
      "Epoch: [5][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9378e-01 (1.0307e+00)\n",
      "Epoch: [5][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0548e+00 (1.0310e+00)\n",
      "Epoch: [5][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0857e+00 (1.0311e+00)\n",
      "Epoch: [5][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1875e+00 (1.0311e+00)\n",
      "Epoch: [5][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1129e+00 (1.0315e+00)\n",
      "Epoch: [5][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0430e+00 (1.0315e+00)\n",
      "Epoch: [5][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2312e-01 (1.0316e+00)\n",
      "Epoch: [5][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0462e+00 (1.0316e+00)\n",
      "Epoch: [5][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4138e-01 (1.0319e+00)\n",
      "Epoch: [5][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1532e+00 (1.0319e+00)\n",
      "Epoch: [5][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6411e-01 (1.0320e+00)\n",
      "Test: [  0/196]\tTime  3.451 ( 3.451)\tLoss 6.7247e-01 (6.7247e-01)\tAcc@1  82.03 ( 82.03)\tAcc@5  96.09 ( 96.09)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.492)\tLoss 6.6616e-01 (8.5465e-01)\tAcc@1  81.25 ( 77.24)\tAcc@5  96.88 ( 93.91)\n",
      "Test: [100/196]\tTime  0.433 ( 0.463)\tLoss 1.3842e+00 (9.7908e-01)\tAcc@1  63.67 ( 74.36)\tAcc@5  87.89 ( 92.60)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2237e+00 (1.1017e+00)\tAcc@1  74.61 ( 72.06)\tAcc@5  86.72 ( 90.97)\n",
      "epoch 5 1.0319454061366409 71.0459976196289 0.007000000000000001 2344421 0.09999448081815601\n",
      "Epoch: [6][   0/5005]\tTime  3.116 ( 3.116)\tData  2.478 ( 2.478)\tLoss 9.3637e-01 (9.3637e-01)\n",
      "Epoch: [6][  50/5005]\tTime  0.638 ( 0.686)\tData  0.000 ( 0.049)\tLoss 1.2188e+00 (1.0045e+00)\n",
      "Epoch: [6][ 100/5005]\tTime  0.637 ( 0.662)\tData  0.000 ( 0.025)\tLoss 1.1281e+00 (1.0158e+00)\n",
      "Epoch: [6][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 9.6343e-01 (1.0092e+00)\n",
      "Epoch: [6][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 9.9651e-01 (1.0055e+00)\n",
      "Epoch: [6][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 1.0027e+00 (1.0037e+00)\n",
      "Epoch: [6][ 300/5005]\tTime  0.638 ( 0.646)\tData  0.000 ( 0.008)\tLoss 8.7998e-01 (1.0029e+00)\n",
      "Epoch: [6][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 1.0030e+00 (1.0012e+00)\n",
      "Epoch: [6][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.006)\tLoss 9.9105e-01 (1.0035e+00)\n",
      "Epoch: [6][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.7601e-01 (1.0051e+00)\n",
      "Epoch: [6][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.3296e-01 (1.0037e+00)\n",
      "Epoch: [6][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.3792e-01 (1.0031e+00)\n",
      "Epoch: [6][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0098e+00 (1.0024e+00)\n",
      "Epoch: [6][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.1029e+00 (1.0021e+00)\n",
      "Epoch: [6][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.6337e-01 (1.0037e+00)\n",
      "Epoch: [6][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.4512e-01 (1.0034e+00)\n",
      "Epoch: [6][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1303e+00 (1.0036e+00)\n",
      "Epoch: [6][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0466e+00 (1.0045e+00)\n",
      "Epoch: [6][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4975e-01 (1.0046e+00)\n",
      "Epoch: [6][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0100e+00 (1.0047e+00)\n",
      "Epoch: [6][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.6366e-01 (1.0055e+00)\n",
      "Epoch: [6][1050/5005]\tTime  0.639 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1019e+00 (1.0071e+00)\n",
      "Epoch: [6][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.7478e-01 (1.0072e+00)\n",
      "Epoch: [6][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.3001e-01 (1.0071e+00)\n",
      "Epoch: [6][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0815e+00 (1.0069e+00)\n",
      "Epoch: [6][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4535e-01 (1.0075e+00)\n",
      "Epoch: [6][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7333e-01 (1.0077e+00)\n",
      "Epoch: [6][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3183e-01 (1.0081e+00)\n",
      "Epoch: [6][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0981e+00 (1.0082e+00)\n",
      "Epoch: [6][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0243e+00 (1.0085e+00)\n",
      "Epoch: [6][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0568e+00 (1.0093e+00)\n",
      "Epoch: [6][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9671e-01 (1.0104e+00)\n",
      "Epoch: [6][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7238e-01 (1.0107e+00)\n",
      "Epoch: [6][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3327e-01 (1.0114e+00)\n",
      "Epoch: [6][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9322e-01 (1.0120e+00)\n",
      "Epoch: [6][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9713e-01 (1.0116e+00)\n",
      "Epoch: [6][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9445e-01 (1.0120e+00)\n",
      "Epoch: [6][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2132e-01 (1.0117e+00)\n",
      "Epoch: [6][1900/5005]\tTime  0.641 ( 0.639)\tData  0.001 ( 0.002)\tLoss 1.0280e+00 (1.0115e+00)\n",
      "Epoch: [6][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0058e+00 (1.0122e+00)\n",
      "Epoch: [6][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.9341e-01 (1.0128e+00)\n",
      "Epoch: [6][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0599e+00 (1.0129e+00)\n",
      "Epoch: [6][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1091e-01 (1.0136e+00)\n",
      "Epoch: [6][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1440e+00 (1.0139e+00)\n",
      "Epoch: [6][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0083e+00 (1.0145e+00)\n",
      "Epoch: [6][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1009e-01 (1.0150e+00)\n",
      "Epoch: [6][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8011e-01 (1.0151e+00)\n",
      "Epoch: [6][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0659e+00 (1.0152e+00)\n",
      "Epoch: [6][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0740e+00 (1.0150e+00)\n",
      "Epoch: [6][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3574e+00 (1.0152e+00)\n",
      "Epoch: [6][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0766e+00 (1.0155e+00)\n",
      "Epoch: [6][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0975e-01 (1.0152e+00)\n",
      "Epoch: [6][2600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1824e+00 (1.0153e+00)\n",
      "Epoch: [6][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2083e+00 (1.0155e+00)\n",
      "Epoch: [6][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0861e+00 (1.0157e+00)\n",
      "Epoch: [6][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0805e+00 (1.0157e+00)\n",
      "Epoch: [6][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1124e+00 (1.0159e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [6][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8013e-01 (1.0161e+00)\n",
      "Epoch: [6][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2902e-01 (1.0160e+00)\n",
      "Epoch: [6][2950/5005]\tTime  0.638 ( 0.638)\tData  0.001 ( 0.001)\tLoss 1.2412e+00 (1.0162e+00)\n",
      "Epoch: [6][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9990e-01 (1.0163e+00)\n",
      "Epoch: [6][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1353e+00 (1.0164e+00)\n",
      "Epoch: [6][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6675e-01 (1.0168e+00)\n",
      "Epoch: [6][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0543e+00 (1.0169e+00)\n",
      "Epoch: [6][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6385e-01 (1.0170e+00)\n",
      "Epoch: [6][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0627e+00 (1.0174e+00)\n",
      "Epoch: [6][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0163e+00 (1.0177e+00)\n",
      "Epoch: [6][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5025e-01 (1.0173e+00)\n",
      "Epoch: [6][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3123e+00 (1.0174e+00)\n",
      "Epoch: [6][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1078e+00 (1.0174e+00)\n",
      "Epoch: [6][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7679e-01 (1.0178e+00)\n",
      "Epoch: [6][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6708e-01 (1.0181e+00)\n",
      "Epoch: [6][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8614e-01 (1.0184e+00)\n",
      "Epoch: [6][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1918e+00 (1.0182e+00)\n",
      "Epoch: [6][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4959e-01 (1.0184e+00)\n",
      "Epoch: [6][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9780e-01 (1.0187e+00)\n",
      "Epoch: [6][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3441e-01 (1.0189e+00)\n",
      "Epoch: [6][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1966e+00 (1.0191e+00)\n",
      "Epoch: [6][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5430e-01 (1.0190e+00)\n",
      "Epoch: [6][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1052e+00 (1.0191e+00)\n",
      "Epoch: [6][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0532e+00 (1.0194e+00)\n",
      "Epoch: [6][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6916e-01 (1.0197e+00)\n",
      "Epoch: [6][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0613e+00 (1.0196e+00)\n",
      "Epoch: [6][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0108e+00 (1.0199e+00)\n",
      "Epoch: [6][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6263e-01 (1.0199e+00)\n",
      "Epoch: [6][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0369e+00 (1.0201e+00)\n",
      "Epoch: [6][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0743e+00 (1.0201e+00)\n",
      "Epoch: [6][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0922e+00 (1.0200e+00)\n",
      "Epoch: [6][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0403e+00 (1.0199e+00)\n",
      "Epoch: [6][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0850e+00 (1.0202e+00)\n",
      "Epoch: [6][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0134e+00 (1.0204e+00)\n",
      "Epoch: [6][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9456e-01 (1.0206e+00)\n",
      "Epoch: [6][4600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1797e+00 (1.0208e+00)\n",
      "Epoch: [6][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6276e-01 (1.0210e+00)\n",
      "Epoch: [6][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1749e+00 (1.0216e+00)\n",
      "Epoch: [6][4750/5005]\tTime  0.637 ( 0.638)\tData  0.001 ( 0.001)\tLoss 1.0679e+00 (1.0218e+00)\n",
      "Epoch: [6][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1014e+00 (1.0220e+00)\n",
      "Epoch: [6][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0008e+00 (1.0220e+00)\n",
      "Epoch: [6][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6445e-01 (1.0219e+00)\n",
      "Epoch: [6][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0024e+00 (1.0218e+00)\n",
      "Epoch: [6][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0628e+00 (1.0218e+00)\n",
      "Test: [  0/196]\tTime  3.503 ( 3.503)\tLoss 7.1913e-01 (7.1913e-01)\tAcc@1  80.08 ( 80.08)\tAcc@5  95.70 ( 95.70)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.493)\tLoss 5.5693e-01 (8.8086e-01)\tAcc@1  84.38 ( 76.56)\tAcc@5  96.88 ( 93.94)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.5077e+00 (1.0129e+00)\tAcc@1  62.89 ( 73.91)\tAcc@5  86.33 ( 92.33)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2329e+00 (1.1311e+00)\tAcc@1  73.44 ( 71.70)\tAcc@5  87.89 ( 90.79)\n",
      "epoch 6 1.0218011855430376 70.69200134277344 0.006500000000000002 2344421 0.09999448081815601\n",
      "Epoch: [7][   0/5005]\tTime  3.172 ( 3.172)\tData  2.531 ( 2.531)\tLoss 1.0393e+00 (1.0393e+00)\n",
      "Epoch: [7][  50/5005]\tTime  0.637 ( 0.687)\tData  0.000 ( 0.050)\tLoss 9.8897e-01 (9.7305e-01)\n",
      "Epoch: [7][ 100/5005]\tTime  0.637 ( 0.662)\tData  0.000 ( 0.025)\tLoss 8.8794e-01 (9.9104e-01)\n",
      "Epoch: [7][ 150/5005]\tTime  0.638 ( 0.654)\tData  0.000 ( 0.017)\tLoss 9.1554e-01 (9.8867e-01)\n",
      "Epoch: [7][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 9.1868e-01 (9.9147e-01)\n",
      "Epoch: [7][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 9.0807e-01 (9.8587e-01)\n",
      "Epoch: [7][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 1.0225e+00 (9.8848e-01)\n",
      "Epoch: [7][ 350/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.007)\tLoss 1.0725e+00 (9.8928e-01)\n",
      "Epoch: [7][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 8.8852e-01 (9.9083e-01)\n",
      "Epoch: [7][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.9309e-01 (9.9169e-01)\n",
      "Epoch: [7][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.5559e-01 (9.9085e-01)\n",
      "Epoch: [7][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.1111e+00 (9.9311e-01)\n",
      "Epoch: [7][ 600/5005]\tTime  0.640 ( 0.642)\tData  0.000 ( 0.004)\tLoss 9.1031e-01 (9.9386e-01)\n",
      "Epoch: [7][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.1350e-01 (9.9436e-01)\n",
      "Epoch: [7][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0872e+00 (9.9411e-01)\n",
      "Epoch: [7][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.1371e+00 (9.9376e-01)\n",
      "Epoch: [7][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.6758e-01 (9.9292e-01)\n",
      "Epoch: [7][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0083e+00 (9.9193e-01)\n",
      "Epoch: [7][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1347e+00 (9.9296e-01)\n",
      "Epoch: [7][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.7396e-01 (9.9282e-01)\n",
      "Epoch: [7][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0649e+00 (9.9265e-01)\n",
      "Epoch: [7][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1427e+00 (9.9232e-01)\n",
      "Epoch: [7][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.1532e-01 (9.9320e-01)\n",
      "Epoch: [7][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.9393e-01 (9.9430e-01)\n",
      "Epoch: [7][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.9892e-01 (9.9549e-01)\n",
      "Epoch: [7][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0840e+00 (9.9646e-01)\n",
      "Epoch: [7][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0770e+00 (9.9711e-01)\n",
      "Epoch: [7][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4565e-01 (9.9690e-01)\n",
      "Epoch: [7][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2245e+00 (9.9737e-01)\n",
      "Epoch: [7][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1579e+00 (9.9795e-01)\n",
      "Epoch: [7][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1280e+00 (9.9834e-01)\n",
      "Epoch: [7][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1152e+00 (9.9852e-01)\n",
      "Epoch: [7][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0347e-01 (9.9879e-01)\n",
      "Epoch: [7][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0486e+00 (9.9933e-01)\n",
      "Epoch: [7][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0636e+00 (9.9919e-01)\n",
      "Epoch: [7][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4817e-01 (9.9976e-01)\n",
      "Epoch: [7][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5893e-01 (9.9993e-01)\n",
      "Epoch: [7][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3296e-01 (1.0001e+00)\n",
      "Epoch: [7][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1240e+00 (1.0005e+00)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [7][1950/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1193e+00 (1.0003e+00)\n",
      "Epoch: [7][2000/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0370e+00 (1.0005e+00)\n",
      "Epoch: [7][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.8148e-01 (9.9970e-01)\n",
      "Epoch: [7][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1042e+00 (9.9941e-01)\n",
      "Epoch: [7][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0743e+00 (9.9948e-01)\n",
      "Epoch: [7][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1323e+00 (9.9991e-01)\n",
      "Epoch: [7][2250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.6945e-01 (9.9957e-01)\n",
      "Epoch: [7][2300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0754e+00 (1.0005e+00)\n",
      "Epoch: [7][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0206e+00 (9.9988e-01)\n",
      "Epoch: [7][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0108e+00 (9.9957e-01)\n",
      "Epoch: [7][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4905e-01 (1.0003e+00)\n",
      "Epoch: [7][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0961e+00 (1.0005e+00)\n",
      "Epoch: [7][2550/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2414e-01 (1.0007e+00)\n",
      "Epoch: [7][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0585e+00 (1.0013e+00)\n",
      "Epoch: [7][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9837e-01 (1.0015e+00)\n",
      "Epoch: [7][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8666e-01 (1.0016e+00)\n",
      "Epoch: [7][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9292e-01 (1.0021e+00)\n",
      "Epoch: [7][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8173e-01 (1.0024e+00)\n",
      "Epoch: [7][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1221e+00 (1.0026e+00)\n",
      "Epoch: [7][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2046e+00 (1.0031e+00)\n",
      "Epoch: [7][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1435e+00 (1.0033e+00)\n",
      "Epoch: [7][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5020e-01 (1.0034e+00)\n",
      "Epoch: [7][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5561e-01 (1.0035e+00)\n",
      "Epoch: [7][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0179e+00 (1.0036e+00)\n",
      "Epoch: [7][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7906e-01 (1.0037e+00)\n",
      "Epoch: [7][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0181e+00 (1.0040e+00)\n",
      "Epoch: [7][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0518e+00 (1.0039e+00)\n",
      "Epoch: [7][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7688e-01 (1.0039e+00)\n",
      "Epoch: [7][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8996e-01 (1.0041e+00)\n",
      "Epoch: [7][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2574e+00 (1.0041e+00)\n",
      "Epoch: [7][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8842e-01 (1.0038e+00)\n",
      "Epoch: [7][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7220e-01 (1.0039e+00)\n",
      "Epoch: [7][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1425e+00 (1.0043e+00)\n",
      "Epoch: [7][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1615e+00 (1.0045e+00)\n",
      "Epoch: [7][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3764e-01 (1.0045e+00)\n",
      "Epoch: [7][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8586e-01 (1.0048e+00)\n",
      "Epoch: [7][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3254e-01 (1.0051e+00)\n",
      "Epoch: [7][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1154e+00 (1.0055e+00)\n",
      "Epoch: [7][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7987e-01 (1.0056e+00)\n",
      "Epoch: [7][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9860e-01 (1.0059e+00)\n",
      "Epoch: [7][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0230e+00 (1.0059e+00)\n",
      "Epoch: [7][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0743e+00 (1.0062e+00)\n",
      "Epoch: [7][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8941e-01 (1.0063e+00)\n",
      "Epoch: [7][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8638e-01 (1.0062e+00)\n",
      "Epoch: [7][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1013e+00 (1.0065e+00)\n",
      "Epoch: [7][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9646e-01 (1.0069e+00)\n",
      "Epoch: [7][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6596e-01 (1.0071e+00)\n",
      "Epoch: [7][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9585e-01 (1.0072e+00)\n",
      "Epoch: [7][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0742e+00 (1.0074e+00)\n",
      "Epoch: [7][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0644e+00 (1.0074e+00)\n",
      "Epoch: [7][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0381e+00 (1.0074e+00)\n",
      "Epoch: [7][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0266e-01 (1.0079e+00)\n",
      "Epoch: [7][4550/5005]\tTime  0.640 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0475e+00 (1.0079e+00)\n",
      "Epoch: [7][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1158e-01 (1.0080e+00)\n",
      "Epoch: [7][4650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0922e+00 (1.0081e+00)\n",
      "Epoch: [7][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0676e+00 (1.0084e+00)\n",
      "Epoch: [7][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0458e+00 (1.0086e+00)\n",
      "Epoch: [7][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8915e-01 (1.0087e+00)\n",
      "Epoch: [7][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0321e+00 (1.0085e+00)\n",
      "Epoch: [7][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3163e-01 (1.0091e+00)\n",
      "Epoch: [7][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9808e-01 (1.0093e+00)\n",
      "Epoch: [7][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2505e-01 (1.0095e+00)\n",
      "Test: [  0/196]\tTime  3.490 ( 3.490)\tLoss 7.0458e-01 (7.0458e-01)\tAcc@1  81.25 ( 81.25)\tAcc@5  95.31 ( 95.31)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.493)\tLoss 4.9056e-01 (8.4023e-01)\tAcc@1  86.72 ( 77.79)\tAcc@5  96.09 ( 94.28)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.3732e+00 (9.6534e-01)\tAcc@1  63.67 ( 75.07)\tAcc@5  86.33 ( 92.75)\n",
      "Test: [150/196]\tTime  0.433 ( 0.454)\tLoss 1.2302e+00 (1.0923e+00)\tAcc@1  71.09 ( 72.42)\tAcc@5  88.28 ( 91.08)\n",
      "epoch 7 1.0094827453433504 71.22999572753906 0.006000000000000002 2344421 0.09999448081815601\n",
      "Epoch: [8][   0/5005]\tTime  3.044 ( 3.044)\tData  2.407 ( 2.407)\tLoss 9.3417e-01 (9.3417e-01)\n",
      "Epoch: [8][  50/5005]\tTime  0.637 ( 0.685)\tData  0.000 ( 0.047)\tLoss 8.9566e-01 (9.7295e-01)\n",
      "Epoch: [8][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.024)\tLoss 8.9802e-01 (9.7577e-01)\n",
      "Epoch: [8][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.016)\tLoss 9.9527e-01 (9.7748e-01)\n",
      "Epoch: [8][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 8.9810e-01 (9.7212e-01)\n",
      "Epoch: [8][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.010)\tLoss 9.8186e-01 (9.7277e-01)\n",
      "Epoch: [8][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 9.1500e-01 (9.7720e-01)\n",
      "Epoch: [8][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 9.2113e-01 (9.7741e-01)\n",
      "Epoch: [8][ 400/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.2911e-01 (9.7587e-01)\n",
      "Epoch: [8][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.0075e+00 (9.7977e-01)\n",
      "Epoch: [8][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.3123e-01 (9.7946e-01)\n",
      "Epoch: [8][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.0713e+00 (9.7854e-01)\n",
      "Epoch: [8][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.3960e-01 (9.7851e-01)\n",
      "Epoch: [8][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.4399e-01 (9.7830e-01)\n",
      "Epoch: [8][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0313e+00 (9.7738e-01)\n",
      "Epoch: [8][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 9.0920e-01 (9.7888e-01)\n",
      "Epoch: [8][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1427e+00 (9.7950e-01)\n",
      "Epoch: [8][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0093e+00 (9.7978e-01)\n",
      "Epoch: [8][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.6811e-01 (9.8018e-01)\n",
      "Epoch: [8][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0662e+00 (9.7970e-01)\n",
      "Epoch: [8][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0869e+00 (9.7999e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [8][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.6705e-01 (9.7999e-01)\n",
      "Epoch: [8][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.9749e-01 (9.8046e-01)\n",
      "Epoch: [8][1150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1081e-01 (9.8200e-01)\n",
      "Epoch: [8][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0195e+00 (9.8216e-01)\n",
      "Epoch: [8][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1199e-01 (9.8229e-01)\n",
      "Epoch: [8][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2310e+00 (9.8390e-01)\n",
      "Epoch: [8][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8865e-01 (9.8495e-01)\n",
      "Epoch: [8][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0421e+00 (9.8544e-01)\n",
      "Epoch: [8][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0533e+00 (9.8536e-01)\n",
      "Epoch: [8][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0520e+00 (9.8587e-01)\n",
      "Epoch: [8][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3925e-01 (9.8623e-01)\n",
      "Epoch: [8][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9668e-01 (9.8600e-01)\n",
      "Epoch: [8][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0063e+00 (9.8632e-01)\n",
      "Epoch: [8][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8346e-01 (9.8643e-01)\n",
      "Epoch: [8][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0183e+00 (9.8649e-01)\n",
      "Epoch: [8][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0176e-01 (9.8683e-01)\n",
      "Epoch: [8][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0541e+00 (9.8713e-01)\n",
      "Epoch: [8][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.7786e-01 (9.8716e-01)\n",
      "Epoch: [8][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0762e+00 (9.8750e-01)\n",
      "Epoch: [8][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0959e+00 (9.8826e-01)\n",
      "Epoch: [8][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0039e+00 (9.8858e-01)\n",
      "Epoch: [8][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1275e+00 (9.8903e-01)\n",
      "Epoch: [8][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9509e-01 (9.8839e-01)\n",
      "Epoch: [8][2200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1032e+00 (9.8882e-01)\n",
      "Epoch: [8][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0207e+00 (9.8888e-01)\n",
      "Epoch: [8][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4901e-01 (9.8851e-01)\n",
      "Epoch: [8][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3648e-01 (9.8849e-01)\n",
      "Epoch: [8][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2823e-01 (9.8857e-01)\n",
      "Epoch: [8][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8878e-01 (9.8830e-01)\n",
      "Epoch: [8][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7235e-01 (9.8805e-01)\n",
      "Epoch: [8][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0643e+00 (9.8852e-01)\n",
      "Epoch: [8][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2982e+00 (9.8871e-01)\n",
      "Epoch: [8][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2432e+00 (9.8920e-01)\n",
      "Epoch: [8][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3297e-01 (9.8932e-01)\n",
      "Epoch: [8][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1146e+00 (9.8931e-01)\n",
      "Epoch: [8][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8513e-01 (9.8920e-01)\n",
      "Epoch: [8][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0454e+00 (9.8903e-01)\n",
      "Epoch: [8][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2795e-01 (9.8925e-01)\n",
      "Epoch: [8][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2201e-01 (9.8956e-01)\n",
      "Epoch: [8][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4952e-01 (9.8993e-01)\n",
      "Epoch: [8][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2561e-01 (9.9027e-01)\n",
      "Epoch: [8][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1700e-01 (9.9065e-01)\n",
      "Epoch: [8][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7310e-01 (9.9096e-01)\n",
      "Epoch: [8][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7595e-01 (9.9110e-01)\n",
      "Epoch: [8][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8683e-01 (9.9101e-01)\n",
      "Epoch: [8][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3461e-01 (9.9106e-01)\n",
      "Epoch: [8][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6226e-01 (9.9127e-01)\n",
      "Epoch: [8][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4043e-01 (9.9141e-01)\n",
      "Epoch: [8][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0425e+00 (9.9186e-01)\n",
      "Epoch: [8][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1799e+00 (9.9220e-01)\n",
      "Epoch: [8][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0272e+00 (9.9249e-01)\n",
      "Epoch: [8][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6223e-01 (9.9276e-01)\n",
      "Epoch: [8][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7419e-01 (9.9292e-01)\n",
      "Epoch: [8][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0983e+00 (9.9341e-01)\n",
      "Epoch: [8][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6278e-01 (9.9401e-01)\n",
      "Epoch: [8][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0565e+00 (9.9425e-01)\n",
      "Epoch: [8][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8450e-01 (9.9436e-01)\n",
      "Epoch: [8][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9146e-01 (9.9481e-01)\n",
      "Epoch: [8][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4784e-01 (9.9495e-01)\n",
      "Epoch: [8][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2286e-01 (9.9496e-01)\n",
      "Epoch: [8][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0031e+00 (9.9496e-01)\n",
      "Epoch: [8][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1347e+00 (9.9499e-01)\n",
      "Epoch: [8][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1193e+00 (9.9508e-01)\n",
      "Epoch: [8][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2088e-01 (9.9522e-01)\n",
      "Epoch: [8][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1670e+00 (9.9522e-01)\n",
      "Epoch: [8][4300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1299e+00 (9.9526e-01)\n",
      "Epoch: [8][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0535e+00 (9.9523e-01)\n",
      "Epoch: [8][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6152e-01 (9.9548e-01)\n",
      "Epoch: [8][4450/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1311e+00 (9.9597e-01)\n",
      "Epoch: [8][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0141e+00 (9.9606e-01)\n",
      "Epoch: [8][4550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1848e+00 (9.9599e-01)\n",
      "Epoch: [8][4600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2167e-01 (9.9600e-01)\n",
      "Epoch: [8][4650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0035e+00 (9.9613e-01)\n",
      "Epoch: [8][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6189e-01 (9.9614e-01)\n",
      "Epoch: [8][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2904e-01 (9.9599e-01)\n",
      "Epoch: [8][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8651e-01 (9.9633e-01)\n",
      "Epoch: [8][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0193e+00 (9.9628e-01)\n",
      "Epoch: [8][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4673e-01 (9.9633e-01)\n",
      "Epoch: [8][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3110e-01 (9.9655e-01)\n",
      "Epoch: [8][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1359e+00 (9.9673e-01)\n",
      "Test: [  0/196]\tTime  3.518 ( 3.518)\tLoss 6.3022e-01 (6.3022e-01)\tAcc@1  81.25 ( 81.25)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.493)\tLoss 5.1537e-01 (8.2896e-01)\tAcc@1  86.33 ( 78.09)\tAcc@5  96.48 ( 94.42)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.4033e+00 (9.6456e-01)\tAcc@1  61.72 ( 74.82)\tAcc@5  86.33 ( 92.76)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2449e+00 (1.0759e+00)\tAcc@1  72.27 ( 72.70)\tAcc@5  88.28 ( 91.24)\n",
      "epoch 8 0.9967551069475246 71.55799865722656 0.005500000000000002 2344421 0.09999448081815601\n",
      "Epoch: [9][   0/5005]\tTime  3.223 ( 3.223)\tData  2.580 ( 2.580)\tLoss 9.8101e-01 (9.8101e-01)\n",
      "Epoch: [9][  50/5005]\tTime  0.638 ( 0.689)\tData  0.000 ( 0.051)\tLoss 9.2945e-01 (9.6202e-01)\n",
      "Epoch: [9][ 100/5005]\tTime  0.638 ( 0.664)\tData  0.000 ( 0.026)\tLoss 9.3823e-01 (9.4928e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [9][ 150/5005]\tTime  0.638 ( 0.655)\tData  0.000 ( 0.017)\tLoss 9.5540e-01 (9.5639e-01)\n",
      "Epoch: [9][ 200/5005]\tTime  0.638 ( 0.651)\tData  0.000 ( 0.013)\tLoss 1.0515e+00 (9.6242e-01)\n",
      "Epoch: [9][ 250/5005]\tTime  0.637 ( 0.648)\tData  0.000 ( 0.011)\tLoss 9.6656e-01 (9.6348e-01)\n",
      "Epoch: [9][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 8.5081e-01 (9.6400e-01)\n",
      "Epoch: [9][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 9.3382e-01 (9.6533e-01)\n",
      "Epoch: [9][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 1.0575e+00 (9.6569e-01)\n",
      "Epoch: [9][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.0468e+00 (9.6848e-01)\n",
      "Epoch: [9][ 500/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.005)\tLoss 9.6286e-01 (9.6538e-01)\n",
      "Epoch: [9][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.7891e-01 (9.6417e-01)\n",
      "Epoch: [9][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.0115e+00 (9.6547e-01)\n",
      "Epoch: [9][ 650/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 1.0936e+00 (9.6447e-01)\n",
      "Epoch: [9][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.5736e-01 (9.6467e-01)\n",
      "Epoch: [9][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.4894e-01 (9.6641e-01)\n",
      "Epoch: [9][ 800/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.6476e-01 (9.6733e-01)\n",
      "Epoch: [9][ 850/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.0014e+00 (9.6760e-01)\n",
      "Epoch: [9][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.7562e-01 (9.6893e-01)\n",
      "Epoch: [9][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0956e+00 (9.6893e-01)\n",
      "Epoch: [9][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0019e+00 (9.6938e-01)\n",
      "Epoch: [9][1050/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4802e-01 (9.6883e-01)\n",
      "Epoch: [9][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0287e+00 (9.6937e-01)\n",
      "Epoch: [9][1150/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.1368e-01 (9.6925e-01)\n",
      "Epoch: [9][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.1660e+00 (9.6890e-01)\n",
      "Epoch: [9][1250/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0103e+00 (9.7031e-01)\n",
      "Epoch: [9][1300/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0989e+00 (9.7016e-01)\n",
      "Epoch: [9][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5885e-01 (9.7113e-01)\n",
      "Epoch: [9][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0196e+00 (9.7155e-01)\n",
      "Epoch: [9][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7007e-01 (9.7260e-01)\n",
      "Epoch: [9][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3797e-01 (9.7274e-01)\n",
      "Epoch: [9][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6496e-01 (9.7285e-01)\n",
      "Epoch: [9][1600/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0988e+00 (9.7315e-01)\n",
      "Epoch: [9][1650/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7748e-01 (9.7297e-01)\n",
      "Epoch: [9][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5946e-01 (9.7340e-01)\n",
      "Epoch: [9][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9517e-01 (9.7331e-01)\n",
      "Epoch: [9][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5689e-01 (9.7360e-01)\n",
      "Epoch: [9][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1851e-01 (9.7335e-01)\n",
      "Epoch: [9][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1268e+00 (9.7387e-01)\n",
      "Epoch: [9][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2619e+00 (9.7444e-01)\n",
      "Epoch: [9][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2031e+00 (9.7484e-01)\n",
      "Epoch: [9][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0463e+00 (9.7526e-01)\n",
      "Epoch: [9][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.8119e-01 (9.7541e-01)\n",
      "Epoch: [9][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.7069e-01 (9.7559e-01)\n",
      "Epoch: [9][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0228e+00 (9.7538e-01)\n",
      "Epoch: [9][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0510e+00 (9.7565e-01)\n",
      "Epoch: [9][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1318e+00 (9.7578e-01)\n",
      "Epoch: [9][2350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1678e+00 (9.7586e-01)\n",
      "Epoch: [9][2400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5476e-01 (9.7621e-01)\n",
      "Epoch: [9][2450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0006e+00 (9.7688e-01)\n",
      "Epoch: [9][2500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4386e-01 (9.7663e-01)\n",
      "Epoch: [9][2550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1125e+00 (9.7698e-01)\n",
      "Epoch: [9][2600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1649e-01 (9.7732e-01)\n",
      "Epoch: [9][2650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1414e+00 (9.7754e-01)\n",
      "Epoch: [9][2700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3708e-01 (9.7735e-01)\n",
      "Epoch: [9][2750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.6847e-01 (9.7776e-01)\n",
      "Epoch: [9][2800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.0880e-01 (9.7765e-01)\n",
      "Epoch: [9][2850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5928e-01 (9.7788e-01)\n",
      "Epoch: [9][2900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0191e+00 (9.7843e-01)\n",
      "Epoch: [9][2950/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1177e+00 (9.7851e-01)\n",
      "Epoch: [9][3000/5005]\tTime  0.640 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2518e+00 (9.7876e-01)\n",
      "Epoch: [9][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9952e-01 (9.7900e-01)\n",
      "Epoch: [9][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0236e+00 (9.7923e-01)\n",
      "Epoch: [9][3150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4933e-01 (9.7955e-01)\n",
      "Epoch: [9][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6220e-01 (9.7984e-01)\n",
      "Epoch: [9][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4894e-01 (9.8026e-01)\n",
      "Epoch: [9][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0487e+00 (9.8003e-01)\n",
      "Epoch: [9][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0525e+00 (9.8034e-01)\n",
      "Epoch: [9][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3305e-01 (9.8053e-01)\n",
      "Epoch: [9][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0957e+00 (9.8083e-01)\n",
      "Epoch: [9][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8180e-01 (9.8093e-01)\n",
      "Epoch: [9][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0155e+00 (9.8123e-01)\n",
      "Epoch: [9][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1684e+00 (9.8145e-01)\n",
      "Epoch: [9][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9279e-01 (9.8151e-01)\n",
      "Epoch: [9][3700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1104e+00 (9.8158e-01)\n",
      "Epoch: [9][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5871e-01 (9.8172e-01)\n",
      "Epoch: [9][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4583e-01 (9.8134e-01)\n",
      "Epoch: [9][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0915e+00 (9.8151e-01)\n",
      "Epoch: [9][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3298e-01 (9.8190e-01)\n",
      "Epoch: [9][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0035e+00 (9.8210e-01)\n",
      "Epoch: [9][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9435e-01 (9.8223e-01)\n",
      "Epoch: [9][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9510e-01 (9.8225e-01)\n",
      "Epoch: [9][4100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1364e+00 (9.8233e-01)\n",
      "Epoch: [9][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0835e+00 (9.8235e-01)\n",
      "Epoch: [9][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6967e-01 (9.8222e-01)\n",
      "Epoch: [9][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3893e-01 (9.8209e-01)\n",
      "Epoch: [9][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1666e-01 (9.8197e-01)\n",
      "Epoch: [9][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3733e-01 (9.8203e-01)\n",
      "Epoch: [9][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7457e-01 (9.8213e-01)\n",
      "Epoch: [9][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9449e-01 (9.8222e-01)\n",
      "Epoch: [9][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0348e+00 (9.8230e-01)\n",
      "Epoch: [9][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1587e+00 (9.8253e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [9][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3564e-01 (9.8267e-01)\n",
      "Epoch: [9][4650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0953e+00 (9.8295e-01)\n",
      "Epoch: [9][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0452e+00 (9.8319e-01)\n",
      "Epoch: [9][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6148e-01 (9.8353e-01)\n",
      "Epoch: [9][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2109e-01 (9.8392e-01)\n",
      "Epoch: [9][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9109e-01 (9.8402e-01)\n",
      "Epoch: [9][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8081e-01 (9.8410e-01)\n",
      "Epoch: [9][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4501e-01 (9.8419e-01)\n",
      "Epoch: [9][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1544e+00 (9.8443e-01)\n",
      "Test: [  0/196]\tTime  3.412 ( 3.412)\tLoss 6.5012e-01 (6.5012e-01)\tAcc@1  79.30 ( 79.30)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.491)\tLoss 5.2254e-01 (8.3769e-01)\tAcc@1  84.38 ( 77.75)\tAcc@5  97.66 ( 94.48)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.4613e+00 (9.7508e-01)\tAcc@1  62.50 ( 74.72)\tAcc@5  86.72 ( 92.78)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.2733e+00 (1.0963e+00)\tAcc@1  73.83 ( 72.37)\tAcc@5  87.50 ( 91.06)\n",
      "epoch 9 0.98448203479906 71.33200073242188 0.005000000000000002 2344421 0.09999448081815601\n",
      "Epoch: [10][   0/5005]\tTime  3.097 ( 3.097)\tData  2.453 ( 2.453)\tLoss 1.0983e+00 (1.0983e+00)\n",
      "Epoch: [10][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.048)\tLoss 9.9973e-01 (9.7459e-01)\n",
      "Epoch: [10][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.024)\tLoss 1.0190e+00 (9.5921e-01)\n",
      "Epoch: [10][ 150/5005]\tTime  0.638 ( 0.654)\tData  0.000 ( 0.016)\tLoss 9.5412e-01 (9.5736e-01)\n",
      "Epoch: [10][ 200/5005]\tTime  0.638 ( 0.650)\tData  0.000 ( 0.012)\tLoss 9.2227e-01 (9.5683e-01)\n",
      "Epoch: [10][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 1.0381e+00 (9.5195e-01)\n",
      "Epoch: [10][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 1.0589e+00 (9.5019e-01)\n",
      "Epoch: [10][ 350/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.007)\tLoss 1.1406e+00 (9.4953e-01)\n",
      "Epoch: [10][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 8.2484e-01 (9.5147e-01)\n",
      "Epoch: [10][ 450/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.1224e+00 (9.5482e-01)\n",
      "Epoch: [10][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.0128e-01 (9.5640e-01)\n",
      "Epoch: [10][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.1073e+00 (9.5467e-01)\n",
      "Epoch: [10][ 600/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.004)\tLoss 1.0453e+00 (9.5439e-01)\n",
      "Epoch: [10][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.4329e-01 (9.5644e-01)\n",
      "Epoch: [10][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.5235e-01 (9.5560e-01)\n",
      "Epoch: [10][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 9.5493e-01 (9.5622e-01)\n",
      "Epoch: [10][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.1252e-01 (9.5591e-01)\n",
      "Epoch: [10][ 850/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.8732e-01 (9.5554e-01)\n",
      "Epoch: [10][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.5855e-01 (9.5543e-01)\n",
      "Epoch: [10][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0438e+00 (9.5660e-01)\n",
      "Epoch: [10][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0085e+00 (9.5639e-01)\n",
      "Epoch: [10][1050/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.1950e-01 (9.5626e-01)\n",
      "Epoch: [10][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.6382e-01 (9.5728e-01)\n",
      "Epoch: [10][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.3779e-01 (9.5633e-01)\n",
      "Epoch: [10][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.8990e-01 (9.5655e-01)\n",
      "Epoch: [10][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9190e-01 (9.5663e-01)\n",
      "Epoch: [10][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2222e-01 (9.5727e-01)\n",
      "Epoch: [10][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7654e-01 (9.5795e-01)\n",
      "Epoch: [10][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9686e-01 (9.5810e-01)\n",
      "Epoch: [10][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0419e+00 (9.5864e-01)\n",
      "Epoch: [10][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9233e-01 (9.5950e-01)\n",
      "Epoch: [10][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1606e+00 (9.6082e-01)\n",
      "Epoch: [10][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1418e+00 (9.6128e-01)\n",
      "Epoch: [10][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9412e-01 (9.6144e-01)\n",
      "Epoch: [10][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0529e+00 (9.6211e-01)\n",
      "Epoch: [10][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0287e-01 (9.6304e-01)\n",
      "Epoch: [10][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2720e-01 (9.6370e-01)\n",
      "Epoch: [10][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0874e+00 (9.6422e-01)\n",
      "Epoch: [10][1900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3848e-01 (9.6438e-01)\n",
      "Epoch: [10][1950/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1024e+00 (9.6465e-01)\n",
      "Epoch: [10][2000/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0148e+00 (9.6459e-01)\n",
      "Epoch: [10][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3651e-01 (9.6448e-01)\n",
      "Epoch: [10][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0766e+00 (9.6439e-01)\n",
      "Epoch: [10][2150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.9828e-01 (9.6468e-01)\n",
      "Epoch: [10][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1899e+00 (9.6486e-01)\n",
      "Epoch: [10][2250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.0774e-01 (9.6535e-01)\n",
      "Epoch: [10][2300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0077e+00 (9.6496e-01)\n",
      "Epoch: [10][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5851e-01 (9.6482e-01)\n",
      "Epoch: [10][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1741e+00 (9.6491e-01)\n",
      "Epoch: [10][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1103e+00 (9.6503e-01)\n",
      "Epoch: [10][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1517e-01 (9.6531e-01)\n",
      "Epoch: [10][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2559e+00 (9.6556e-01)\n",
      "Epoch: [10][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1393e+00 (9.6601e-01)\n",
      "Epoch: [10][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1032e-01 (9.6576e-01)\n",
      "Epoch: [10][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1204e+00 (9.6578e-01)\n",
      "Epoch: [10][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4904e-01 (9.6578e-01)\n",
      "Epoch: [10][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5630e-01 (9.6616e-01)\n",
      "Epoch: [10][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0792e-01 (9.6662e-01)\n",
      "Epoch: [10][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2006e-01 (9.6675e-01)\n",
      "Epoch: [10][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4426e-01 (9.6711e-01)\n",
      "Epoch: [10][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0053e-01 (9.6706e-01)\n",
      "Epoch: [10][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1041e+00 (9.6734e-01)\n",
      "Epoch: [10][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0156e+00 (9.6728e-01)\n",
      "Epoch: [10][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0056e+00 (9.6749e-01)\n",
      "Epoch: [10][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4990e-01 (9.6761e-01)\n",
      "Epoch: [10][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7410e-01 (9.6786e-01)\n",
      "Epoch: [10][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1007e+00 (9.6819e-01)\n",
      "Epoch: [10][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0527e+00 (9.6801e-01)\n",
      "Epoch: [10][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0037e+00 (9.6830e-01)\n",
      "Epoch: [10][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0796e+00 (9.6835e-01)\n",
      "Epoch: [10][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0668e-01 (9.6863e-01)\n",
      "Epoch: [10][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0004e+00 (9.6920e-01)\n",
      "Epoch: [10][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4760e-01 (9.6892e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [10][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1239e+00 (9.6919e-01)\n",
      "Epoch: [10][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0026e+00 (9.6962e-01)\n",
      "Epoch: [10][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5949e-01 (9.6990e-01)\n",
      "Epoch: [10][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9647e-01 (9.7005e-01)\n",
      "Epoch: [10][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4839e-01 (9.7045e-01)\n",
      "Epoch: [10][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4441e-01 (9.7062e-01)\n",
      "Epoch: [10][3950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5143e-01 (9.7079e-01)\n",
      "Epoch: [10][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0597e+00 (9.7080e-01)\n",
      "Epoch: [10][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0038e+00 (9.7068e-01)\n",
      "Epoch: [10][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1007e-01 (9.7065e-01)\n",
      "Epoch: [10][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1214e+00 (9.7070e-01)\n",
      "Epoch: [10][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0470e+00 (9.7082e-01)\n",
      "Epoch: [10][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8605e-01 (9.7078e-01)\n",
      "Epoch: [10][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1315e-01 (9.7071e-01)\n",
      "Epoch: [10][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0444e+00 (9.7074e-01)\n",
      "Epoch: [10][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0694e-01 (9.7095e-01)\n",
      "Epoch: [10][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7938e-01 (9.7087e-01)\n",
      "Epoch: [10][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7989e-01 (9.7102e-01)\n",
      "Epoch: [10][4550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9803e-01 (9.7122e-01)\n",
      "Epoch: [10][4600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7853e-01 (9.7140e-01)\n",
      "Epoch: [10][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5966e-01 (9.7138e-01)\n",
      "Epoch: [10][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9212e-01 (9.7159e-01)\n",
      "Epoch: [10][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9715e-01 (9.7159e-01)\n",
      "Epoch: [10][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6708e-01 (9.7140e-01)\n",
      "Epoch: [10][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6899e-01 (9.7152e-01)\n",
      "Epoch: [10][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9329e-01 (9.7167e-01)\n",
      "Epoch: [10][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8746e-01 (9.7168e-01)\n",
      "Epoch: [10][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0867e+00 (9.7176e-01)\n",
      "Test: [  0/196]\tTime  3.437 ( 3.437)\tLoss 5.9677e-01 (5.9677e-01)\tAcc@1  82.81 ( 82.81)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 5.9691e-01 (8.3992e-01)\tAcc@1  83.98 ( 77.51)\tAcc@5  96.88 ( 94.17)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.4662e+00 (9.5802e-01)\tAcc@1  58.98 ( 75.13)\tAcc@5  84.38 ( 92.87)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.2071e+00 (1.0820e+00)\tAcc@1  75.00 ( 72.72)\tAcc@5  87.89 ( 91.26)\n",
      "epoch 10 0.9717668149622368 71.77399444580078 0.004500000000000001 2344421 0.09999448081815601\n",
      "Epoch: [11][   0/5005]\tTime  3.064 ( 3.064)\tData  2.425 ( 2.425)\tLoss 1.1245e+00 (1.1245e+00)\n",
      "Epoch: [11][  50/5005]\tTime  0.637 ( 0.685)\tData  0.000 ( 0.048)\tLoss 1.2623e+00 (9.3338e-01)\n",
      "Epoch: [11][ 100/5005]\tTime  0.637 ( 0.662)\tData  0.000 ( 0.024)\tLoss 1.0060e+00 (9.3745e-01)\n",
      "Epoch: [11][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.016)\tLoss 9.8594e-01 (9.3337e-01)\n",
      "Epoch: [11][ 200/5005]\tTime  0.638 ( 0.650)\tData  0.000 ( 0.012)\tLoss 8.4418e-01 (9.3741e-01)\n",
      "Epoch: [11][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.9625e-01 (9.3753e-01)\n",
      "Epoch: [11][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 1.0823e+00 (9.3902e-01)\n",
      "Epoch: [11][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 1.1169e+00 (9.4547e-01)\n",
      "Epoch: [11][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 9.1921e-01 (9.4627e-01)\n",
      "Epoch: [11][ 450/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.2658e-01 (9.4622e-01)\n",
      "Epoch: [11][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.6982e-01 (9.4675e-01)\n",
      "Epoch: [11][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.6116e-01 (9.4592e-01)\n",
      "Epoch: [11][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.1207e-01 (9.4673e-01)\n",
      "Epoch: [11][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.9837e-01 (9.4721e-01)\n",
      "Epoch: [11][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.9776e-01 (9.4606e-01)\n",
      "Epoch: [11][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.3916e-01 (9.4611e-01)\n",
      "Epoch: [11][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9091e-01 (9.4521e-01)\n",
      "Epoch: [11][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.7953e-01 (9.4377e-01)\n",
      "Epoch: [11][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4874e-01 (9.4356e-01)\n",
      "Epoch: [11][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0251e+00 (9.4341e-01)\n",
      "Epoch: [11][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.2081e-01 (9.4427e-01)\n",
      "Epoch: [11][1050/5005]\tTime  0.639 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.3806e-01 (9.4394e-01)\n",
      "Epoch: [11][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.8363e-01 (9.4354e-01)\n",
      "Epoch: [11][1150/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0562e+00 (9.4405e-01)\n",
      "Epoch: [11][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.5070e-01 (9.4359e-01)\n",
      "Epoch: [11][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0539e-01 (9.4296e-01)\n",
      "Epoch: [11][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0016e+00 (9.4349e-01)\n",
      "Epoch: [11][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.2430e+00 (9.4327e-01)\n",
      "Epoch: [11][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5670e-01 (9.4353e-01)\n",
      "Epoch: [11][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0273e+00 (9.4340e-01)\n",
      "Epoch: [11][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0483e+00 (9.4398e-01)\n",
      "Epoch: [11][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4568e-01 (9.4380e-01)\n",
      "Epoch: [11][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0061e+00 (9.4417e-01)\n",
      "Epoch: [11][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0410e+00 (9.4353e-01)\n",
      "Epoch: [11][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1111e-01 (9.4363e-01)\n",
      "Epoch: [11][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6985e-01 (9.4357e-01)\n",
      "Epoch: [11][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0886e+00 (9.4356e-01)\n",
      "Epoch: [11][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8695e-01 (9.4419e-01)\n",
      "Epoch: [11][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.8531e-01 (9.4499e-01)\n",
      "Epoch: [11][1950/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4960e-01 (9.4558e-01)\n",
      "Epoch: [11][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3078e-01 (9.4615e-01)\n",
      "Epoch: [11][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0516e+00 (9.4618e-01)\n",
      "Epoch: [11][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.9660e-01 (9.4657e-01)\n",
      "Epoch: [11][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.2260e-01 (9.4705e-01)\n",
      "Epoch: [11][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.8499e-01 (9.4696e-01)\n",
      "Epoch: [11][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5792e-01 (9.4730e-01)\n",
      "Epoch: [11][2300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.5746e-01 (9.4744e-01)\n",
      "Epoch: [11][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.9863e-01 (9.4745e-01)\n",
      "Epoch: [11][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9058e-01 (9.4773e-01)\n",
      "Epoch: [11][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1199e+00 (9.4790e-01)\n",
      "Epoch: [11][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6923e-01 (9.4794e-01)\n",
      "Epoch: [11][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9556e-01 (9.4798e-01)\n",
      "Epoch: [11][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4469e-01 (9.4862e-01)\n",
      "Epoch: [11][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0863e-01 (9.4842e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [11][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5277e-01 (9.4854e-01)\n",
      "Epoch: [11][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7549e-01 (9.4856e-01)\n",
      "Epoch: [11][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0931e+00 (9.4895e-01)\n",
      "Epoch: [11][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7054e-01 (9.4872e-01)\n",
      "Epoch: [11][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0971e-01 (9.4896e-01)\n",
      "Epoch: [11][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0648e+00 (9.4903e-01)\n",
      "Epoch: [11][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6670e-01 (9.4960e-01)\n",
      "Epoch: [11][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0056e+00 (9.4965e-01)\n",
      "Epoch: [11][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8227e-01 (9.4981e-01)\n",
      "Epoch: [11][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0061e+00 (9.4974e-01)\n",
      "Epoch: [11][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8461e-01 (9.4996e-01)\n",
      "Epoch: [11][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0571e+00 (9.5044e-01)\n",
      "Epoch: [11][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0974e+00 (9.5061e-01)\n",
      "Epoch: [11][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7995e-01 (9.5073e-01)\n",
      "Epoch: [11][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2586e-01 (9.5065e-01)\n",
      "Epoch: [11][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0405e-01 (9.5076e-01)\n",
      "Epoch: [11][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0267e+00 (9.5094e-01)\n",
      "Epoch: [11][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5719e-01 (9.5113e-01)\n",
      "Epoch: [11][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6386e-01 (9.5160e-01)\n",
      "Epoch: [11][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1524e+00 (9.5182e-01)\n",
      "Epoch: [11][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4535e-01 (9.5229e-01)\n",
      "Epoch: [11][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2348e-01 (9.5244e-01)\n",
      "Epoch: [11][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3289e-01 (9.5227e-01)\n",
      "Epoch: [11][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0213e+00 (9.5237e-01)\n",
      "Epoch: [11][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7584e-01 (9.5229e-01)\n",
      "Epoch: [11][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2792e-01 (9.5273e-01)\n",
      "Epoch: [11][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9473e-01 (9.5283e-01)\n",
      "Epoch: [11][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8252e-01 (9.5295e-01)\n",
      "Epoch: [11][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0079e+00 (9.5303e-01)\n",
      "Epoch: [11][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0131e+00 (9.5333e-01)\n",
      "Epoch: [11][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7034e-01 (9.5326e-01)\n",
      "Epoch: [11][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5553e-01 (9.5325e-01)\n",
      "Epoch: [11][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0139e+00 (9.5333e-01)\n",
      "Epoch: [11][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0024e+00 (9.5323e-01)\n",
      "Epoch: [11][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1103e+00 (9.5361e-01)\n",
      "Epoch: [11][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.3110e+00 (9.5360e-01)\n",
      "Epoch: [11][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4910e-01 (9.5364e-01)\n",
      "Epoch: [11][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0797e+00 (9.5376e-01)\n",
      "Epoch: [11][4600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0649e+00 (9.5380e-01)\n",
      "Epoch: [11][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1946e-01 (9.5392e-01)\n",
      "Epoch: [11][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0982e+00 (9.5419e-01)\n",
      "Epoch: [11][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1659e+00 (9.5428e-01)\n",
      "Epoch: [11][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5105e-01 (9.5459e-01)\n",
      "Epoch: [11][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2239e-01 (9.5478e-01)\n",
      "Epoch: [11][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0407e+00 (9.5499e-01)\n",
      "Epoch: [11][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1689e+00 (9.5522e-01)\n",
      "Epoch: [11][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9736e-01 (9.5537e-01)\n",
      "Test: [  0/196]\tTime  3.470 ( 3.470)\tLoss 6.1728e-01 (6.1728e-01)\tAcc@1  82.42 ( 82.42)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 5.7769e-01 (8.0266e-01)\tAcc@1  83.98 ( 78.18)\tAcc@5  97.27 ( 94.79)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.2617e+00 (9.3462e-01)\tAcc@1  64.45 ( 75.47)\tAcc@5  90.62 ( 93.24)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2192e+00 (1.0616e+00)\tAcc@1  73.05 ( 72.96)\tAcc@5  87.50 ( 91.53)\n",
      "epoch 11 0.9554493352312023 71.97599792480469 0.004000000000000002 2344421 0.09999448081815601\n",
      "Epoch: [12][   0/5005]\tTime  3.185 ( 3.185)\tData  2.546 ( 2.546)\tLoss 8.7446e-01 (8.7446e-01)\n",
      "Epoch: [12][  50/5005]\tTime  0.638 ( 0.687)\tData  0.000 ( 0.050)\tLoss 9.2111e-01 (9.4147e-01)\n",
      "Epoch: [12][ 100/5005]\tTime  0.638 ( 0.663)\tData  0.000 ( 0.025)\tLoss 1.1623e+00 (9.4380e-01)\n",
      "Epoch: [12][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 9.9588e-01 (9.3279e-01)\n",
      "Epoch: [12][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 9.3001e-01 (9.2959e-01)\n",
      "Epoch: [12][ 250/5005]\tTime  0.637 ( 0.648)\tData  0.000 ( 0.010)\tLoss 9.3270e-01 (9.3204e-01)\n",
      "Epoch: [12][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 8.9640e-01 (9.2945e-01)\n",
      "Epoch: [12][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.007)\tLoss 8.7177e-01 (9.3058e-01)\n",
      "Epoch: [12][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 9.1441e-01 (9.3249e-01)\n",
      "Epoch: [12][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.6852e-01 (9.3252e-01)\n",
      "Epoch: [12][ 500/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.005)\tLoss 8.2186e-01 (9.3243e-01)\n",
      "Epoch: [12][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.4720e-01 (9.3320e-01)\n",
      "Epoch: [12][ 600/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.004)\tLoss 8.8534e-01 (9.3285e-01)\n",
      "Epoch: [12][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.8803e-01 (9.3332e-01)\n",
      "Epoch: [12][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.1996e-01 (9.3329e-01)\n",
      "Epoch: [12][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.0743e-01 (9.3289e-01)\n",
      "Epoch: [12][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.3362e-01 (9.3172e-01)\n",
      "Epoch: [12][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0683e+00 (9.3305e-01)\n",
      "Epoch: [12][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0146e+00 (9.3257e-01)\n",
      "Epoch: [12][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.0480e-01 (9.3294e-01)\n",
      "Epoch: [12][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.3477e-01 (9.3247e-01)\n",
      "Epoch: [12][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.0088e-01 (9.3186e-01)\n",
      "Epoch: [12][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.9023e-01 (9.3221e-01)\n",
      "Epoch: [12][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.0566e-01 (9.3308e-01)\n",
      "Epoch: [12][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.5130e-01 (9.3394e-01)\n",
      "Epoch: [12][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5894e-01 (9.3419e-01)\n",
      "Epoch: [12][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0965e-01 (9.3449e-01)\n",
      "Epoch: [12][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6270e-01 (9.3509e-01)\n",
      "Epoch: [12][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1685e+00 (9.3457e-01)\n",
      "Epoch: [12][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4234e-01 (9.3588e-01)\n",
      "Epoch: [12][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1155e+00 (9.3549e-01)\n",
      "Epoch: [12][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0355e-01 (9.3550e-01)\n",
      "Epoch: [12][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0378e+00 (9.3535e-01)\n",
      "Epoch: [12][1650/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0391e-01 (9.3578e-01)\n",
      "Epoch: [12][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6757e-01 (9.3646e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [12][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2011e-01 (9.3653e-01)\n",
      "Epoch: [12][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0082e-01 (9.3648e-01)\n",
      "Epoch: [12][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0446e+00 (9.3650e-01)\n",
      "Epoch: [12][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7961e-01 (9.3660e-01)\n",
      "Epoch: [12][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6763e-01 (9.3682e-01)\n",
      "Epoch: [12][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4016e-01 (9.3782e-01)\n",
      "Epoch: [12][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3387e-01 (9.3849e-01)\n",
      "Epoch: [12][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.0942e-01 (9.3843e-01)\n",
      "Epoch: [12][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3095e-01 (9.3849e-01)\n",
      "Epoch: [12][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0597e+00 (9.3868e-01)\n",
      "Epoch: [12][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.5109e-01 (9.3833e-01)\n",
      "Epoch: [12][2300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.8749e-01 (9.3819e-01)\n",
      "Epoch: [12][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.8624e-01 (9.3791e-01)\n",
      "Epoch: [12][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0618e-01 (9.3840e-01)\n",
      "Epoch: [12][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7340e-01 (9.3834e-01)\n",
      "Epoch: [12][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1023e-01 (9.3835e-01)\n",
      "Epoch: [12][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5479e-01 (9.3874e-01)\n",
      "Epoch: [12][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9206e-01 (9.3876e-01)\n",
      "Epoch: [12][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1969e-01 (9.3899e-01)\n",
      "Epoch: [12][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7602e-01 (9.3911e-01)\n",
      "Epoch: [12][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2738e+00 (9.3917e-01)\n",
      "Epoch: [12][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5735e-01 (9.3862e-01)\n",
      "Epoch: [12][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6323e-01 (9.3907e-01)\n",
      "Epoch: [12][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8664e-01 (9.3878e-01)\n",
      "Epoch: [12][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9372e-01 (9.3879e-01)\n",
      "Epoch: [12][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1544e+00 (9.3932e-01)\n",
      "Epoch: [12][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9234e-01 (9.3940e-01)\n",
      "Epoch: [12][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5706e-01 (9.3951e-01)\n",
      "Epoch: [12][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8566e-01 (9.3964e-01)\n",
      "Epoch: [12][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4675e-01 (9.3961e-01)\n",
      "Epoch: [12][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0684e+00 (9.3930e-01)\n",
      "Epoch: [12][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3413e-01 (9.3946e-01)\n",
      "Epoch: [12][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0258e+00 (9.3955e-01)\n",
      "Epoch: [12][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4054e-01 (9.3969e-01)\n",
      "Epoch: [12][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5456e-01 (9.3961e-01)\n",
      "Epoch: [12][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3409e-01 (9.3973e-01)\n",
      "Epoch: [12][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3775e-01 (9.3960e-01)\n",
      "Epoch: [12][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6721e-01 (9.3951e-01)\n",
      "Epoch: [12][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2710e-01 (9.3929e-01)\n",
      "Epoch: [12][3700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0313e+00 (9.3953e-01)\n",
      "Epoch: [12][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8956e-01 (9.3984e-01)\n",
      "Epoch: [12][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7284e-01 (9.3971e-01)\n",
      "Epoch: [12][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1356e+00 (9.4018e-01)\n",
      "Epoch: [12][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7649e-01 (9.4014e-01)\n",
      "Epoch: [12][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8159e-01 (9.4029e-01)\n",
      "Epoch: [12][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0427e-01 (9.4043e-01)\n",
      "Epoch: [12][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6277e-01 (9.4079e-01)\n",
      "Epoch: [12][4100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6304e-01 (9.4075e-01)\n",
      "Epoch: [12][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6825e-01 (9.4109e-01)\n",
      "Epoch: [12][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1915e-01 (9.4105e-01)\n",
      "Epoch: [12][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8635e-01 (9.4128e-01)\n",
      "Epoch: [12][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0279e+00 (9.4121e-01)\n",
      "Epoch: [12][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4559e-01 (9.4139e-01)\n",
      "Epoch: [12][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2460e-01 (9.4134e-01)\n",
      "Epoch: [12][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4027e-01 (9.4150e-01)\n",
      "Epoch: [12][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0721e+00 (9.4172e-01)\n",
      "Epoch: [12][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8414e-01 (9.4164e-01)\n",
      "Epoch: [12][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9184e-01 (9.4182e-01)\n",
      "Epoch: [12][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0250e+00 (9.4180e-01)\n",
      "Epoch: [12][4700/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7589e-01 (9.4200e-01)\n",
      "Epoch: [12][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0679e+00 (9.4207e-01)\n",
      "Epoch: [12][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6747e-01 (9.4228e-01)\n",
      "Epoch: [12][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7414e-01 (9.4206e-01)\n",
      "Epoch: [12][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6557e-01 (9.4209e-01)\n",
      "Epoch: [12][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0281e-01 (9.4212e-01)\n",
      "Epoch: [12][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0647e+00 (9.4232e-01)\n",
      "Test: [  0/196]\tTime  3.464 ( 3.464)\tLoss 5.2942e-01 (5.2942e-01)\tAcc@1  84.77 ( 84.77)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 4.9469e-01 (7.8270e-01)\tAcc@1  85.16 ( 78.91)\tAcc@5  96.88 ( 94.89)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.4355e+00 (9.2120e-01)\tAcc@1  58.20 ( 75.80)\tAcc@5  86.33 ( 93.37)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2658e+00 (1.0383e+00)\tAcc@1  73.05 ( 73.62)\tAcc@5  86.72 ( 91.81)\n",
      "epoch 12 0.94230660993754 72.67599487304688 0.003500000000000001 2344421 0.09999448081815601\n",
      "Epoch: [13][   0/5005]\tTime  3.033 ( 3.033)\tData  2.393 ( 2.393)\tLoss 9.2930e-01 (9.2930e-01)\n",
      "Epoch: [13][  50/5005]\tTime  0.639 ( 0.685)\tData  0.000 ( 0.047)\tLoss 9.2101e-01 (9.3461e-01)\n",
      "Epoch: [13][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.024)\tLoss 8.5909e-01 (9.3119e-01)\n",
      "Epoch: [13][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.016)\tLoss 9.1847e-01 (9.3353e-01)\n",
      "Epoch: [13][ 200/5005]\tTime  0.638 ( 0.650)\tData  0.000 ( 0.012)\tLoss 9.1674e-01 (9.2407e-01)\n",
      "Epoch: [13][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.0010e-01 (9.2086e-01)\n",
      "Epoch: [13][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 9.6293e-01 (9.1982e-01)\n",
      "Epoch: [13][ 350/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 7.3510e-01 (9.1986e-01)\n",
      "Epoch: [13][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 9.0435e-01 (9.1695e-01)\n",
      "Epoch: [13][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.0010e+00 (9.1432e-01)\n",
      "Epoch: [13][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.1003e+00 (9.1341e-01)\n",
      "Epoch: [13][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.4694e-01 (9.1458e-01)\n",
      "Epoch: [13][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 9.1440e-01 (9.1399e-01)\n",
      "Epoch: [13][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.2721e-01 (9.1356e-01)\n",
      "Epoch: [13][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.6789e-01 (9.1402e-01)\n",
      "Epoch: [13][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.0789e+00 (9.1416e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [13][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.1054e-01 (9.1474e-01)\n",
      "Epoch: [13][ 850/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1005e+00 (9.1598e-01)\n",
      "Epoch: [13][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.1775e+00 (9.1622e-01)\n",
      "Epoch: [13][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.7051e-01 (9.1723e-01)\n",
      "Epoch: [13][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0169e+00 (9.1677e-01)\n",
      "Epoch: [13][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.3901e-01 (9.1647e-01)\n",
      "Epoch: [13][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.2346e+00 (9.1731e-01)\n",
      "Epoch: [13][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.9462e-01 (9.1666e-01)\n",
      "Epoch: [13][1200/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7796e-01 (9.1538e-01)\n",
      "Epoch: [13][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5052e-01 (9.1513e-01)\n",
      "Epoch: [13][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3281e-01 (9.1479e-01)\n",
      "Epoch: [13][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0817e-01 (9.1523e-01)\n",
      "Epoch: [13][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3983e-01 (9.1517e-01)\n",
      "Epoch: [13][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8478e-01 (9.1532e-01)\n",
      "Epoch: [13][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0702e+00 (9.1653e-01)\n",
      "Epoch: [13][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8973e-01 (9.1646e-01)\n",
      "Epoch: [13][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0323e-01 (9.1689e-01)\n",
      "Epoch: [13][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0232e+00 (9.1816e-01)\n",
      "Epoch: [13][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5634e-01 (9.1801e-01)\n",
      "Epoch: [13][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3237e-01 (9.1906e-01)\n",
      "Epoch: [13][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7089e-01 (9.1913e-01)\n",
      "Epoch: [13][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0247e+00 (9.1929e-01)\n",
      "Epoch: [13][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.2101e-01 (9.1949e-01)\n",
      "Epoch: [13][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 6.9598e-01 (9.1957e-01)\n",
      "Epoch: [13][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1995e-01 (9.2005e-01)\n",
      "Epoch: [13][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.4086e-01 (9.1982e-01)\n",
      "Epoch: [13][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0109e+00 (9.2034e-01)\n",
      "Epoch: [13][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3839e-01 (9.2078e-01)\n",
      "Epoch: [13][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.4328e-01 (9.2083e-01)\n",
      "Epoch: [13][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3849e-01 (9.2102e-01)\n",
      "Epoch: [13][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1711e+00 (9.2146e-01)\n",
      "Epoch: [13][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9896e-01 (9.2171e-01)\n",
      "Epoch: [13][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8591e-01 (9.2168e-01)\n",
      "Epoch: [13][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1538e-01 (9.2164e-01)\n",
      "Epoch: [13][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5213e-01 (9.2241e-01)\n",
      "Epoch: [13][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4565e-01 (9.2258e-01)\n",
      "Epoch: [13][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6100e-01 (9.2235e-01)\n",
      "Epoch: [13][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0320e-01 (9.2247e-01)\n",
      "Epoch: [13][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7090e-01 (9.2227e-01)\n",
      "Epoch: [13][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0593e+00 (9.2204e-01)\n",
      "Epoch: [13][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7292e-01 (9.2264e-01)\n",
      "Epoch: [13][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6449e-01 (9.2284e-01)\n",
      "Epoch: [13][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1250e-01 (9.2285e-01)\n",
      "Epoch: [13][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0042e+00 (9.2279e-01)\n",
      "Epoch: [13][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0458e+00 (9.2332e-01)\n",
      "Epoch: [13][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2388e-01 (9.2334e-01)\n",
      "Epoch: [13][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0168e+00 (9.2348e-01)\n",
      "Epoch: [13][3150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7138e-01 (9.2382e-01)\n",
      "Epoch: [13][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4311e-01 (9.2413e-01)\n",
      "Epoch: [13][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5004e-01 (9.2441e-01)\n",
      "Epoch: [13][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3864e-01 (9.2443e-01)\n",
      "Epoch: [13][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5084e-01 (9.2476e-01)\n",
      "Epoch: [13][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3983e-01 (9.2538e-01)\n",
      "Epoch: [13][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5455e-01 (9.2536e-01)\n",
      "Epoch: [13][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0117e+00 (9.2556e-01)\n",
      "Epoch: [13][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3592e-01 (9.2515e-01)\n",
      "Epoch: [13][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0273e+00 (9.2560e-01)\n",
      "Epoch: [13][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8864e-01 (9.2605e-01)\n",
      "Epoch: [13][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0544e+00 (9.2577e-01)\n",
      "Epoch: [13][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4192e-01 (9.2581e-01)\n",
      "Epoch: [13][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8978e-01 (9.2564e-01)\n",
      "Epoch: [13][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0775e+00 (9.2558e-01)\n",
      "Epoch: [13][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0727e+00 (9.2563e-01)\n",
      "Epoch: [13][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6801e-01 (9.2570e-01)\n",
      "Epoch: [13][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8826e-01 (9.2577e-01)\n",
      "Epoch: [13][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0362e+00 (9.2582e-01)\n",
      "Epoch: [13][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5372e-01 (9.2598e-01)\n",
      "Epoch: [13][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1768e+00 (9.2623e-01)\n",
      "Epoch: [13][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8844e-01 (9.2661e-01)\n",
      "Epoch: [13][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0141e+00 (9.2666e-01)\n",
      "Epoch: [13][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5045e-01 (9.2657e-01)\n",
      "Epoch: [13][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1614e+00 (9.2678e-01)\n",
      "Epoch: [13][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5884e-01 (9.2681e-01)\n",
      "Epoch: [13][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6738e-01 (9.2664e-01)\n",
      "Epoch: [13][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6633e-01 (9.2703e-01)\n",
      "Epoch: [13][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4304e-01 (9.2723e-01)\n",
      "Epoch: [13][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5373e-01 (9.2721e-01)\n",
      "Epoch: [13][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0717e+00 (9.2713e-01)\n",
      "Epoch: [13][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3861e-01 (9.2721e-01)\n",
      "Epoch: [13][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4069e-01 (9.2725e-01)\n",
      "Epoch: [13][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9603e-01 (9.2754e-01)\n",
      "Epoch: [13][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0361e+00 (9.2784e-01)\n",
      "Epoch: [13][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9427e-01 (9.2799e-01)\n",
      "Epoch: [13][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7495e-01 (9.2822e-01)\n",
      "Epoch: [13][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0085e+00 (9.2833e-01)\n",
      "Test: [  0/196]\tTime  3.385 ( 3.385)\tLoss 5.8076e-01 (5.8076e-01)\tAcc@1  84.38 ( 84.38)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.491)\tLoss 5.7072e-01 (8.0052e-01)\tAcc@1  84.38 ( 78.75)\tAcc@5  96.48 ( 94.52)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.5407e+00 (9.3271e-01)\tAcc@1  55.08 ( 75.70)\tAcc@5  86.33 ( 93.09)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.2021e+00 (1.0523e+00)\tAcc@1  74.61 ( 73.48)\tAcc@5  86.72 ( 91.53)\n",
      "epoch 13 0.9283417635067227 72.50799560546875 0.0030000000000000014 2344421 0.09999448081815601\n",
      "Epoch: [14][   0/5005]\tTime  3.075 ( 3.075)\tData  2.437 ( 2.437)\tLoss 1.0509e+00 (1.0509e+00)\n",
      "Epoch: [14][  50/5005]\tTime  0.637 ( 0.685)\tData  0.000 ( 0.048)\tLoss 1.0970e+00 (9.0840e-01)\n",
      "Epoch: [14][ 100/5005]\tTime  0.637 ( 0.662)\tData  0.000 ( 0.024)\tLoss 8.3724e-01 (9.1922e-01)\n",
      "Epoch: [14][ 150/5005]\tTime  0.638 ( 0.654)\tData  0.000 ( 0.016)\tLoss 9.7419e-01 (9.1906e-01)\n",
      "Epoch: [14][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.012)\tLoss 9.1281e-01 (9.1520e-01)\n",
      "Epoch: [14][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 9.9463e-01 (9.1421e-01)\n",
      "Epoch: [14][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 7.9613e-01 (9.0868e-01)\n",
      "Epoch: [14][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 7.4990e-01 (9.0580e-01)\n",
      "Epoch: [14][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 1.1062e+00 (9.0413e-01)\n",
      "Epoch: [14][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.4120e-01 (9.0421e-01)\n",
      "Epoch: [14][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.4609e-01 (9.0210e-01)\n",
      "Epoch: [14][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.4824e-01 (9.0050e-01)\n",
      "Epoch: [14][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 8.4894e-01 (9.0130e-01)\n",
      "Epoch: [14][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.7969e-01 (9.0059e-01)\n",
      "Epoch: [14][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.7032e-01 (9.0042e-01)\n",
      "Epoch: [14][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 7.8216e-01 (9.0038e-01)\n",
      "Epoch: [14][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0409e+00 (9.0001e-01)\n",
      "Epoch: [14][ 850/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0127e+00 (9.0088e-01)\n",
      "Epoch: [14][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.7477e-01 (9.0061e-01)\n",
      "Epoch: [14][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0321e+00 (9.0152e-01)\n",
      "Epoch: [14][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.3686e-01 (9.0330e-01)\n",
      "Epoch: [14][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.7550e-01 (9.0369e-01)\n",
      "Epoch: [14][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.8467e-01 (9.0312e-01)\n",
      "Epoch: [14][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.5190e-01 (9.0273e-01)\n",
      "Epoch: [14][1200/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6025e-01 (9.0296e-01)\n",
      "Epoch: [14][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9239e-01 (9.0254e-01)\n",
      "Epoch: [14][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4819e-01 (9.0259e-01)\n",
      "Epoch: [14][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8417e-01 (9.0303e-01)\n",
      "Epoch: [14][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0340e-01 (9.0268e-01)\n",
      "Epoch: [14][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2970e-01 (9.0272e-01)\n",
      "Epoch: [14][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5105e-01 (9.0284e-01)\n",
      "Epoch: [14][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0024e+00 (9.0282e-01)\n",
      "Epoch: [14][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2753e-01 (9.0305e-01)\n",
      "Epoch: [14][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8746e-01 (9.0280e-01)\n",
      "Epoch: [14][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2099e-01 (9.0332e-01)\n",
      "Epoch: [14][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7029e-01 (9.0349e-01)\n",
      "Epoch: [14][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8451e-01 (9.0362e-01)\n",
      "Epoch: [14][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6599e-01 (9.0467e-01)\n",
      "Epoch: [14][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.3580e-01 (9.0436e-01)\n",
      "Epoch: [14][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.6401e-01 (9.0409e-01)\n",
      "Epoch: [14][2000/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.5251e-01 (9.0435e-01)\n",
      "Epoch: [14][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.0234e-01 (9.0487e-01)\n",
      "Epoch: [14][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.0483e-01 (9.0509e-01)\n",
      "Epoch: [14][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.5353e-01 (9.0442e-01)\n",
      "Epoch: [14][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.0038e-01 (9.0426e-01)\n",
      "Epoch: [14][2250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0601e+00 (9.0416e-01)\n",
      "Epoch: [14][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7052e-01 (9.0439e-01)\n",
      "Epoch: [14][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0168e+00 (9.0458e-01)\n",
      "Epoch: [14][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8845e-01 (9.0513e-01)\n",
      "Epoch: [14][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7317e-01 (9.0512e-01)\n",
      "Epoch: [14][2500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2665e-01 (9.0521e-01)\n",
      "Epoch: [14][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6291e-01 (9.0500e-01)\n",
      "Epoch: [14][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5345e-01 (9.0524e-01)\n",
      "Epoch: [14][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5642e-01 (9.0538e-01)\n",
      "Epoch: [14][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3452e-01 (9.0530e-01)\n",
      "Epoch: [14][2750/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8523e-01 (9.0526e-01)\n",
      "Epoch: [14][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9889e-01 (9.0566e-01)\n",
      "Epoch: [14][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0409e+00 (9.0586e-01)\n",
      "Epoch: [14][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7445e-01 (9.0608e-01)\n",
      "Epoch: [14][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8439e-01 (9.0627e-01)\n",
      "Epoch: [14][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6539e-01 (9.0609e-01)\n",
      "Epoch: [14][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8126e-01 (9.0595e-01)\n",
      "Epoch: [14][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6872e-01 (9.0611e-01)\n",
      "Epoch: [14][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7568e-01 (9.0613e-01)\n",
      "Epoch: [14][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3068e-01 (9.0628e-01)\n",
      "Epoch: [14][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8297e-01 (9.0663e-01)\n",
      "Epoch: [14][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0178e+00 (9.0695e-01)\n",
      "Epoch: [14][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1065e+00 (9.0701e-01)\n",
      "Epoch: [14][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0902e-01 (9.0713e-01)\n",
      "Epoch: [14][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4756e-01 (9.0722e-01)\n",
      "Epoch: [14][3500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5788e-01 (9.0737e-01)\n",
      "Epoch: [14][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6096e-01 (9.0729e-01)\n",
      "Epoch: [14][3600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9379e-01 (9.0723e-01)\n",
      "Epoch: [14][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4746e-01 (9.0720e-01)\n",
      "Epoch: [14][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3510e-01 (9.0726e-01)\n",
      "Epoch: [14][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1071e+00 (9.0700e-01)\n",
      "Epoch: [14][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8962e-01 (9.0684e-01)\n",
      "Epoch: [14][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6205e-01 (9.0697e-01)\n",
      "Epoch: [14][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6535e-01 (9.0729e-01)\n",
      "Epoch: [14][3950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5666e-01 (9.0739e-01)\n",
      "Epoch: [14][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2771e-01 (9.0757e-01)\n",
      "Epoch: [14][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1732e-01 (9.0779e-01)\n",
      "Epoch: [14][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9817e-01 (9.0780e-01)\n",
      "Epoch: [14][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9612e-01 (9.0784e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [14][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1278e-01 (9.0783e-01)\n",
      "Epoch: [14][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0947e-01 (9.0786e-01)\n",
      "Epoch: [14][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9866e-01 (9.0804e-01)\n",
      "Epoch: [14][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1319e-01 (9.0810e-01)\n",
      "Epoch: [14][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6298e-01 (9.0832e-01)\n",
      "Epoch: [14][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9371e-01 (9.0847e-01)\n",
      "Epoch: [14][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7244e-01 (9.0863e-01)\n",
      "Epoch: [14][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9189e-01 (9.0879e-01)\n",
      "Epoch: [14][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0660e-01 (9.0877e-01)\n",
      "Epoch: [14][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2774e-01 (9.0868e-01)\n",
      "Epoch: [14][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0650e-01 (9.0908e-01)\n",
      "Epoch: [14][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4186e-01 (9.0934e-01)\n",
      "Epoch: [14][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0980e+00 (9.0916e-01)\n",
      "Epoch: [14][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6874e-01 (9.0904e-01)\n",
      "Epoch: [14][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0259e+00 (9.0942e-01)\n",
      "Epoch: [14][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2206e-01 (9.0968e-01)\n",
      "Epoch: [14][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2993e-01 (9.0973e-01)\n",
      "Test: [  0/196]\tTime  3.462 ( 3.462)\tLoss 5.9230e-01 (5.9230e-01)\tAcc@1  83.59 ( 83.59)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 4.8244e-01 (7.6527e-01)\tAcc@1  87.50 ( 79.38)\tAcc@5  97.27 ( 95.17)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.4146e+00 (8.9055e-01)\tAcc@1  60.94 ( 76.67)\tAcc@5  88.67 ( 93.80)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2106e+00 (1.0180e+00)\tAcc@1  74.22 ( 74.26)\tAcc@5  87.89 ( 92.13)\n",
      "epoch 14 0.9097616108767113 73.0739974975586 0.002500000000000001 2344421 0.09999448081815601\n",
      "Epoch: [15][   0/5005]\tTime  3.123 ( 3.123)\tData  2.482 ( 2.482)\tLoss 8.2902e-01 (8.2902e-01)\n",
      "Epoch: [15][  50/5005]\tTime  0.638 ( 0.686)\tData  0.000 ( 0.049)\tLoss 9.3516e-01 (8.6539e-01)\n",
      "Epoch: [15][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.025)\tLoss 8.3248e-01 (8.7036e-01)\n",
      "Epoch: [15][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 8.7426e-01 (8.7388e-01)\n",
      "Epoch: [15][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 8.3021e-01 (8.7484e-01)\n",
      "Epoch: [15][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.0586e-01 (8.7674e-01)\n",
      "Epoch: [15][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 8.9332e-01 (8.7679e-01)\n",
      "Epoch: [15][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.007)\tLoss 7.8768e-01 (8.7618e-01)\n",
      "Epoch: [15][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.006)\tLoss 8.5062e-01 (8.7606e-01)\n",
      "Epoch: [15][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.0461e-01 (8.7985e-01)\n",
      "Epoch: [15][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.6798e-01 (8.7971e-01)\n",
      "Epoch: [15][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.5389e-01 (8.8001e-01)\n",
      "Epoch: [15][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 8.7007e-01 (8.8051e-01)\n",
      "Epoch: [15][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.7920e-01 (8.8039e-01)\n",
      "Epoch: [15][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0775e+00 (8.8239e-01)\n",
      "Epoch: [15][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.7420e-01 (8.8394e-01)\n",
      "Epoch: [15][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 7.7354e-01 (8.8421e-01)\n",
      "Epoch: [15][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.1081e-01 (8.8549e-01)\n",
      "Epoch: [15][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6741e-01 (8.8600e-01)\n",
      "Epoch: [15][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.8594e-01 (8.8674e-01)\n",
      "Epoch: [15][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.8269e-01 (8.8740e-01)\n",
      "Epoch: [15][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.0393e-01 (8.8630e-01)\n",
      "Epoch: [15][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.6149e-01 (8.8678e-01)\n",
      "Epoch: [15][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.1054e-01 (8.8705e-01)\n",
      "Epoch: [15][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.7743e-01 (8.8680e-01)\n",
      "Epoch: [15][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0791e+00 (8.8775e-01)\n",
      "Epoch: [15][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1642e-01 (8.8831e-01)\n",
      "Epoch: [15][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0429e+00 (8.8870e-01)\n",
      "Epoch: [15][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2415e-01 (8.8817e-01)\n",
      "Epoch: [15][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9924e-01 (8.8815e-01)\n",
      "Epoch: [15][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5486e-01 (8.8750e-01)\n",
      "Epoch: [15][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7638e-01 (8.8741e-01)\n",
      "Epoch: [15][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3481e-01 (8.8803e-01)\n",
      "Epoch: [15][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8508e-01 (8.8818e-01)\n",
      "Epoch: [15][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.0156e-01 (8.8764e-01)\n",
      "Epoch: [15][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2597e-01 (8.8719e-01)\n",
      "Epoch: [15][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5288e-01 (8.8711e-01)\n",
      "Epoch: [15][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0744e+00 (8.8734e-01)\n",
      "Epoch: [15][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.4343e-01 (8.8797e-01)\n",
      "Epoch: [15][1950/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0418e+00 (8.8839e-01)\n",
      "Epoch: [15][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.6805e-01 (8.8839e-01)\n",
      "Epoch: [15][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.1330e-01 (8.8853e-01)\n",
      "Epoch: [15][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0175e+00 (8.8886e-01)\n",
      "Epoch: [15][2150/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.8747e-01 (8.8912e-01)\n",
      "Epoch: [15][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.9726e-01 (8.8906e-01)\n",
      "Epoch: [15][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8414e-01 (8.8885e-01)\n",
      "Epoch: [15][2300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4925e-01 (8.8881e-01)\n",
      "Epoch: [15][2350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1038e-01 (8.8902e-01)\n",
      "Epoch: [15][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0631e+00 (8.8928e-01)\n",
      "Epoch: [15][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5112e-01 (8.8946e-01)\n",
      "Epoch: [15][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7787e-01 (8.8922e-01)\n",
      "Epoch: [15][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5442e-01 (8.8919e-01)\n",
      "Epoch: [15][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1977e-01 (8.8936e-01)\n",
      "Epoch: [15][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3156e-01 (8.8953e-01)\n",
      "Epoch: [15][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6567e-01 (8.8950e-01)\n",
      "Epoch: [15][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0570e-01 (8.8968e-01)\n",
      "Epoch: [15][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5102e-01 (8.8950e-01)\n",
      "Epoch: [15][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7720e-01 (8.8978e-01)\n",
      "Epoch: [15][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5632e-01 (8.9015e-01)\n",
      "Epoch: [15][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4823e-01 (8.8997e-01)\n",
      "Epoch: [15][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6164e-01 (8.9014e-01)\n",
      "Epoch: [15][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0131e-01 (8.9022e-01)\n",
      "Epoch: [15][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5048e-01 (8.9014e-01)\n",
      "Epoch: [15][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2428e-01 (8.9034e-01)\n",
      "Epoch: [15][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6490e-01 (8.9049e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [15][3250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4912e-01 (8.9044e-01)\n",
      "Epoch: [15][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1736e-01 (8.9033e-01)\n",
      "Epoch: [15][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6559e-01 (8.9053e-01)\n",
      "Epoch: [15][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8077e-01 (8.9035e-01)\n",
      "Epoch: [15][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0303e-01 (8.9055e-01)\n",
      "Epoch: [15][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8000e-01 (8.9052e-01)\n",
      "Epoch: [15][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0179e+00 (8.9017e-01)\n",
      "Epoch: [15][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6879e-01 (8.9032e-01)\n",
      "Epoch: [15][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8553e-01 (8.9050e-01)\n",
      "Epoch: [15][3700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3205e-01 (8.9061e-01)\n",
      "Epoch: [15][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5155e-01 (8.9093e-01)\n",
      "Epoch: [15][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0994e-01 (8.9086e-01)\n",
      "Epoch: [15][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8479e-01 (8.9110e-01)\n",
      "Epoch: [15][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0176e+00 (8.9121e-01)\n",
      "Epoch: [15][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1472e-01 (8.9127e-01)\n",
      "Epoch: [15][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0098e+00 (8.9126e-01)\n",
      "Epoch: [15][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5310e-01 (8.9123e-01)\n",
      "Epoch: [15][4100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3456e-01 (8.9144e-01)\n",
      "Epoch: [15][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0583e-01 (8.9136e-01)\n",
      "Epoch: [15][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1047e-01 (8.9149e-01)\n",
      "Epoch: [15][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9166e-01 (8.9159e-01)\n",
      "Epoch: [15][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5130e-01 (8.9173e-01)\n",
      "Epoch: [15][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0072e+00 (8.9170e-01)\n",
      "Epoch: [15][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2087e-01 (8.9166e-01)\n",
      "Epoch: [15][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4297e-01 (8.9173e-01)\n",
      "Epoch: [15][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3522e-01 (8.9187e-01)\n",
      "Epoch: [15][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8684e-01 (8.9188e-01)\n",
      "Epoch: [15][4600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6836e-01 (8.9175e-01)\n",
      "Epoch: [15][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9101e-01 (8.9188e-01)\n",
      "Epoch: [15][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8018e-01 (8.9191e-01)\n",
      "Epoch: [15][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9397e-01 (8.9198e-01)\n",
      "Epoch: [15][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6174e-01 (8.9189e-01)\n",
      "Epoch: [15][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5778e-01 (8.9183e-01)\n",
      "Epoch: [15][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0403e-01 (8.9194e-01)\n",
      "Epoch: [15][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6077e-01 (8.9198e-01)\n",
      "Epoch: [15][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7476e-01 (8.9208e-01)\n",
      "Test: [  0/196]\tTime  3.429 ( 3.429)\tLoss 5.4922e-01 (5.4922e-01)\tAcc@1  84.77 ( 84.77)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 4.5947e-01 (7.5729e-01)\tAcc@1  88.28 ( 79.46)\tAcc@5  97.27 ( 95.41)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.3809e+00 (8.8918e-01)\tAcc@1  60.94 ( 76.69)\tAcc@5  87.89 ( 93.85)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.1865e+00 (1.0107e+00)\tAcc@1  75.39 ( 74.35)\tAcc@5  88.67 ( 92.20)\n",
      "epoch 15 0.8920679091557239 73.32599639892578 0.0020000000000000005 2344421 0.09999448081815601\n",
      "Epoch: [16][   0/5005]\tTime  3.108 ( 3.108)\tData  2.470 ( 2.470)\tLoss 8.7656e-01 (8.7656e-01)\n",
      "Epoch: [16][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.049)\tLoss 8.8678e-01 (8.7193e-01)\n",
      "Epoch: [16][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.025)\tLoss 9.6498e-01 (8.7743e-01)\n",
      "Epoch: [16][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 7.9304e-01 (8.7412e-01)\n",
      "Epoch: [16][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.012)\tLoss 8.0304e-01 (8.7668e-01)\n",
      "Epoch: [16][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.6903e-01 (8.7282e-01)\n",
      "Epoch: [16][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 8.6556e-01 (8.7557e-01)\n",
      "Epoch: [16][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 8.3146e-01 (8.7455e-01)\n",
      "Epoch: [16][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.1534e-01 (8.7352e-01)\n",
      "Epoch: [16][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.6811e-01 (8.7261e-01)\n",
      "Epoch: [16][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.7086e-01 (8.7166e-01)\n",
      "Epoch: [16][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.9753e-01 (8.7076e-01)\n",
      "Epoch: [16][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.4992e-01 (8.7186e-01)\n",
      "Epoch: [16][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.1207e-01 (8.7096e-01)\n",
      "Epoch: [16][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.9530e-01 (8.7285e-01)\n",
      "Epoch: [16][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.2708e-01 (8.7227e-01)\n",
      "Epoch: [16][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.0282e-01 (8.7239e-01)\n",
      "Epoch: [16][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.8386e-01 (8.7269e-01)\n",
      "Epoch: [16][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.1731e-01 (8.7291e-01)\n",
      "Epoch: [16][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.1752e-01 (8.7298e-01)\n",
      "Epoch: [16][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4865e-01 (8.7324e-01)\n",
      "Epoch: [16][1050/5005]\tTime  0.640 ( 0.639)\tData  0.000 ( 0.003)\tLoss 7.8909e-01 (8.7393e-01)\n",
      "Epoch: [16][1100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0809e-01 (8.7387e-01)\n",
      "Epoch: [16][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.1010e-01 (8.7358e-01)\n",
      "Epoch: [16][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3932e-01 (8.7356e-01)\n",
      "Epoch: [16][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0932e-01 (8.7409e-01)\n",
      "Epoch: [16][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4397e-01 (8.7277e-01)\n",
      "Epoch: [16][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6863e-01 (8.7273e-01)\n",
      "Epoch: [16][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5606e-01 (8.7346e-01)\n",
      "Epoch: [16][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.2625e-01 (8.7337e-01)\n",
      "Epoch: [16][1500/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8300e-01 (8.7400e-01)\n",
      "Epoch: [16][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3499e-01 (8.7392e-01)\n",
      "Epoch: [16][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7348e-01 (8.7452e-01)\n",
      "Epoch: [16][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8053e-01 (8.7377e-01)\n",
      "Epoch: [16][1700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.4086e-01 (8.7370e-01)\n",
      "Epoch: [16][1750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 9.3339e-01 (8.7359e-01)\n",
      "Epoch: [16][1800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.2781e-01 (8.7378e-01)\n",
      "Epoch: [16][1850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.3062e-01 (8.7378e-01)\n",
      "Epoch: [16][1900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3279e-01 (8.7435e-01)\n",
      "Epoch: [16][1950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2506e-01 (8.7414e-01)\n",
      "Epoch: [16][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8136e-01 (8.7442e-01)\n",
      "Epoch: [16][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5245e-01 (8.7467e-01)\n",
      "Epoch: [16][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4483e-01 (8.7514e-01)\n",
      "Epoch: [16][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3961e-01 (8.7493e-01)\n",
      "Epoch: [16][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9339e-01 (8.7530e-01)\n",
      "Epoch: [16][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6578e-01 (8.7477e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [16][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3662e-01 (8.7477e-01)\n",
      "Epoch: [16][2350/5005]\tTime  0.640 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0962e-01 (8.7454e-01)\n",
      "Epoch: [16][2400/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4015e-01 (8.7467e-01)\n",
      "Epoch: [16][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6714e-01 (8.7480e-01)\n",
      "Epoch: [16][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6649e-01 (8.7462e-01)\n",
      "Epoch: [16][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1610e-01 (8.7466e-01)\n",
      "Epoch: [16][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3651e-01 (8.7457e-01)\n",
      "Epoch: [16][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7776e-01 (8.7462e-01)\n",
      "Epoch: [16][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5116e-01 (8.7482e-01)\n",
      "Epoch: [16][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8876e-01 (8.7499e-01)\n",
      "Epoch: [16][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4669e-01 (8.7514e-01)\n",
      "Epoch: [16][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6503e-01 (8.7485e-01)\n",
      "Epoch: [16][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1350e-01 (8.7481e-01)\n",
      "Epoch: [16][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9023e-01 (8.7510e-01)\n",
      "Epoch: [16][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3928e-01 (8.7514e-01)\n",
      "Epoch: [16][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1521e-01 (8.7519e-01)\n",
      "Epoch: [16][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3932e-01 (8.7536e-01)\n",
      "Epoch: [16][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5748e-01 (8.7523e-01)\n",
      "Epoch: [16][3200/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0590e-01 (8.7553e-01)\n",
      "Epoch: [16][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3618e-01 (8.7525e-01)\n",
      "Epoch: [16][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2521e-01 (8.7506e-01)\n",
      "Epoch: [16][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0457e-01 (8.7514e-01)\n",
      "Epoch: [16][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5047e-01 (8.7530e-01)\n",
      "Epoch: [16][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8539e-01 (8.7546e-01)\n",
      "Epoch: [16][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5378e-01 (8.7528e-01)\n",
      "Epoch: [16][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2135e-01 (8.7551e-01)\n",
      "Epoch: [16][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9376e-01 (8.7523e-01)\n",
      "Epoch: [16][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3747e-01 (8.7518e-01)\n",
      "Epoch: [16][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1845e-01 (8.7521e-01)\n",
      "Epoch: [16][3750/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5733e-01 (8.7523e-01)\n",
      "Epoch: [16][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0298e-01 (8.7518e-01)\n",
      "Epoch: [16][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0758e-01 (8.7514e-01)\n",
      "Epoch: [16][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1992e+00 (8.7533e-01)\n",
      "Epoch: [16][3950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6107e-01 (8.7541e-01)\n",
      "Epoch: [16][4000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9306e-01 (8.7570e-01)\n",
      "Epoch: [16][4050/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7616e-01 (8.7569e-01)\n",
      "Epoch: [16][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5763e-01 (8.7605e-01)\n",
      "Epoch: [16][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7287e-01 (8.7600e-01)\n",
      "Epoch: [16][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4650e-01 (8.7598e-01)\n",
      "Epoch: [16][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0353e+00 (8.7589e-01)\n",
      "Epoch: [16][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1860e-01 (8.7591e-01)\n",
      "Epoch: [16][4350/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5069e-01 (8.7610e-01)\n",
      "Epoch: [16][4400/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.0626e-01 (8.7622e-01)\n",
      "Epoch: [16][4450/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.8721e-01 (8.7634e-01)\n",
      "Epoch: [16][4500/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.9244e-01 (8.7632e-01)\n",
      "Epoch: [16][4550/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.4635e-01 (8.7615e-01)\n",
      "Epoch: [16][4600/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.9401e-01 (8.7639e-01)\n",
      "Epoch: [16][4650/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.2742e-01 (8.7647e-01)\n",
      "Epoch: [16][4700/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 1.0616e+00 (8.7669e-01)\n",
      "Epoch: [16][4750/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.1675e-01 (8.7674e-01)\n",
      "Epoch: [16][4800/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.0541e-01 (8.7641e-01)\n",
      "Epoch: [16][4850/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 9.2751e-01 (8.7634e-01)\n",
      "Epoch: [16][4900/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.6513e-01 (8.7667e-01)\n",
      "Epoch: [16][4950/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.3861e-01 (8.7677e-01)\n",
      "Epoch: [16][5000/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.4586e-01 (8.7683e-01)\n",
      "Test: [  0/196]\tTime  3.477 ( 3.477)\tLoss 5.4518e-01 (5.4518e-01)\tAcc@1  85.94 ( 85.94)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 4.0025e-01 (7.5239e-01)\tAcc@1  89.45 ( 79.85)\tAcc@5  97.66 ( 95.14)\n",
      "Test: [100/196]\tTime  0.433 ( 0.463)\tLoss 1.3192e+00 (8.8384e-01)\tAcc@1  64.45 ( 76.82)\tAcc@5  88.67 ( 93.84)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.1817e+00 (1.0034e+00)\tAcc@1  74.61 ( 74.59)\tAcc@5  88.28 ( 92.30)\n",
      "epoch 16 0.8768161085941701 73.6240005493164 0.0015000000000000011 2344421 0.09999448081815601\n",
      "Epoch: [17][   0/5005]\tTime  3.079 ( 3.079)\tData  2.437 ( 2.437)\tLoss 8.8207e-01 (8.8207e-01)\n",
      "Epoch: [17][  50/5005]\tTime  0.638 ( 0.685)\tData  0.000 ( 0.048)\tLoss 8.3628e-01 (8.3909e-01)\n",
      "Epoch: [17][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.024)\tLoss 9.0245e-01 (8.5100e-01)\n",
      "Epoch: [17][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 8.8264e-01 (8.5291e-01)\n",
      "Epoch: [17][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 6.5610e-01 (8.4693e-01)\n",
      "Epoch: [17][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.1187e-01 (8.4677e-01)\n",
      "Epoch: [17][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.2884e-01 (8.4920e-01)\n",
      "Epoch: [17][ 350/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 7.8254e-01 (8.4760e-01)\n",
      "Epoch: [17][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.7323e-01 (8.4976e-01)\n",
      "Epoch: [17][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.9023e-01 (8.5007e-01)\n",
      "Epoch: [17][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.9757e-01 (8.5055e-01)\n",
      "Epoch: [17][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.0594e+00 (8.5237e-01)\n",
      "Epoch: [17][ 600/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.9135e-01 (8.5321e-01)\n",
      "Epoch: [17][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.3175e-01 (8.5264e-01)\n",
      "Epoch: [17][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.9103e-01 (8.5302e-01)\n",
      "Epoch: [17][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 7.4623e-01 (8.5297e-01)\n",
      "Epoch: [17][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4304e-01 (8.5320e-01)\n",
      "Epoch: [17][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6516e-01 (8.5311e-01)\n",
      "Epoch: [17][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.1794e-01 (8.5260e-01)\n",
      "Epoch: [17][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.2067e-01 (8.5295e-01)\n",
      "Epoch: [17][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4442e-01 (8.5329e-01)\n",
      "Epoch: [17][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.6677e-01 (8.5396e-01)\n",
      "Epoch: [17][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.6309e-01 (8.5303e-01)\n",
      "Epoch: [17][1150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4661e-01 (8.5358e-01)\n",
      "Epoch: [17][1200/5005]\tTime  0.643 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0041e+00 (8.5374e-01)\n",
      "Epoch: [17][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1663e-01 (8.5383e-01)\n",
      "Epoch: [17][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5410e-01 (8.5309e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [17][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5311e-01 (8.5380e-01)\n",
      "Epoch: [17][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.2505e-01 (8.5347e-01)\n",
      "Epoch: [17][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0014e+00 (8.5371e-01)\n",
      "Epoch: [17][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6523e-01 (8.5356e-01)\n",
      "Epoch: [17][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9259e-01 (8.5327e-01)\n",
      "Epoch: [17][1600/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8650e-01 (8.5331e-01)\n",
      "Epoch: [17][1650/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6770e-01 (8.5368e-01)\n",
      "Epoch: [17][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9469e-01 (8.5380e-01)\n",
      "Epoch: [17][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2020e-01 (8.5363e-01)\n",
      "Epoch: [17][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4787e-01 (8.5405e-01)\n",
      "Epoch: [17][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0680e-01 (8.5424e-01)\n",
      "Epoch: [17][1900/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.0967e-01 (8.5392e-01)\n",
      "Epoch: [17][1950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5903e-01 (8.5401e-01)\n",
      "Epoch: [17][2000/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9443e-01 (8.5429e-01)\n",
      "Epoch: [17][2050/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5003e-01 (8.5504e-01)\n",
      "Epoch: [17][2100/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0325e+00 (8.5540e-01)\n",
      "Epoch: [17][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4611e-01 (8.5521e-01)\n",
      "Epoch: [17][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2009e-01 (8.5548e-01)\n",
      "Epoch: [17][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0361e+00 (8.5494e-01)\n",
      "Epoch: [17][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9559e-01 (8.5461e-01)\n",
      "Epoch: [17][2350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4887e-01 (8.5431e-01)\n",
      "Epoch: [17][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7972e-01 (8.5436e-01)\n",
      "Epoch: [17][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8375e-01 (8.5452e-01)\n",
      "Epoch: [17][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3301e-01 (8.5445e-01)\n",
      "Epoch: [17][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3292e-01 (8.5437e-01)\n",
      "Epoch: [17][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0694e-01 (8.5462e-01)\n",
      "Epoch: [17][2650/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9795e-01 (8.5470e-01)\n",
      "Epoch: [17][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9136e-01 (8.5480e-01)\n",
      "Epoch: [17][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5437e-01 (8.5503e-01)\n",
      "Epoch: [17][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2228e-01 (8.5489e-01)\n",
      "Epoch: [17][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2503e-01 (8.5503e-01)\n",
      "Epoch: [17][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1261e-01 (8.5497e-01)\n",
      "Epoch: [17][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4532e-01 (8.5470e-01)\n",
      "Epoch: [17][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7469e-01 (8.5488e-01)\n",
      "Epoch: [17][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9262e-01 (8.5498e-01)\n",
      "Epoch: [17][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4140e-01 (8.5545e-01)\n",
      "Epoch: [17][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0408e+00 (8.5568e-01)\n",
      "Epoch: [17][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4434e-01 (8.5592e-01)\n",
      "Epoch: [17][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4424e-01 (8.5582e-01)\n",
      "Epoch: [17][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1341e-01 (8.5570e-01)\n",
      "Epoch: [17][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2682e-01 (8.5555e-01)\n",
      "Epoch: [17][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4023e-01 (8.5556e-01)\n",
      "Epoch: [17][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1184e-01 (8.5590e-01)\n",
      "Epoch: [17][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9396e-01 (8.5595e-01)\n",
      "Epoch: [17][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2836e-01 (8.5615e-01)\n",
      "Epoch: [17][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2207e-01 (8.5609e-01)\n",
      "Epoch: [17][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3238e-01 (8.5640e-01)\n",
      "Epoch: [17][3700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0498e+00 (8.5666e-01)\n",
      "Epoch: [17][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4374e-01 (8.5704e-01)\n",
      "Epoch: [17][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3750e-01 (8.5678e-01)\n",
      "Epoch: [17][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9731e-01 (8.5675e-01)\n",
      "Epoch: [17][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8249e-01 (8.5681e-01)\n",
      "Epoch: [17][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1721e-01 (8.5709e-01)\n",
      "Epoch: [17][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2398e-01 (8.5732e-01)\n",
      "Epoch: [17][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3375e-01 (8.5761e-01)\n",
      "Epoch: [17][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8147e-01 (8.5752e-01)\n",
      "Epoch: [17][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0280e-01 (8.5756e-01)\n",
      "Epoch: [17][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6691e-01 (8.5756e-01)\n",
      "Epoch: [17][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4067e-01 (8.5773e-01)\n",
      "Epoch: [17][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1686e-01 (8.5757e-01)\n",
      "Epoch: [17][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2796e-01 (8.5736e-01)\n",
      "Epoch: [17][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2637e-01 (8.5720e-01)\n",
      "Epoch: [17][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1661e-01 (8.5720e-01)\n",
      "Epoch: [17][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0464e+00 (8.5738e-01)\n",
      "Epoch: [17][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1156e-01 (8.5737e-01)\n",
      "Epoch: [17][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4908e-01 (8.5739e-01)\n",
      "Epoch: [17][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2424e-01 (8.5721e-01)\n",
      "Epoch: [17][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0598e+00 (8.5725e-01)\n",
      "Epoch: [17][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0794e-01 (8.5731e-01)\n",
      "Epoch: [17][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7339e-01 (8.5737e-01)\n",
      "Epoch: [17][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4059e-01 (8.5750e-01)\n",
      "Epoch: [17][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0336e-01 (8.5743e-01)\n",
      "Epoch: [17][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8152e-01 (8.5759e-01)\n",
      "Epoch: [17][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7380e-01 (8.5735e-01)\n",
      "Test: [  0/196]\tTime  3.571 ( 3.571)\tLoss 5.5847e-01 (5.5847e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.494)\tLoss 4.3009e-01 (7.4129e-01)\tAcc@1  89.06 ( 79.89)\tAcc@5  97.27 ( 95.43)\n",
      "Test: [100/196]\tTime  0.433 ( 0.464)\tLoss 1.3460e+00 (8.6955e-01)\tAcc@1  62.89 ( 77.06)\tAcc@5  88.67 ( 94.02)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1673e+00 (9.9128e-01)\tAcc@1  76.17 ( 74.77)\tAcc@5  87.89 ( 92.45)\n",
      "epoch 17 0.8573152099222754 73.80999755859375 0.0010000000000000005 2344421 0.09999448081815601\n",
      "Epoch: [18][   0/5005]\tTime  3.054 ( 3.054)\tData  2.416 ( 2.416)\tLoss 1.0308e+00 (1.0308e+00)\n",
      "Epoch: [18][  50/5005]\tTime  0.637 ( 0.685)\tData  0.000 ( 0.048)\tLoss 7.3201e-01 (8.6129e-01)\n",
      "Epoch: [18][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.024)\tLoss 8.2147e-01 (8.5960e-01)\n",
      "Epoch: [18][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 9.2045e-01 (8.5436e-01)\n",
      "Epoch: [18][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 8.6775e-01 (8.5016e-01)\n",
      "Epoch: [18][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 7.2003e-01 (8.4456e-01)\n",
      "Epoch: [18][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.1704e-01 (8.4434e-01)\n",
      "Epoch: [18][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 7.5976e-01 (8.4297e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [18][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.3117e-01 (8.4349e-01)\n",
      "Epoch: [18][ 450/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.006)\tLoss 8.3534e-01 (8.4465e-01)\n",
      "Epoch: [18][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.3971e-01 (8.4486e-01)\n",
      "Epoch: [18][ 550/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.005)\tLoss 7.7022e-01 (8.4376e-01)\n",
      "Epoch: [18][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.0348e-01 (8.4166e-01)\n",
      "Epoch: [18][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.4656e-01 (8.4266e-01)\n",
      "Epoch: [18][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.5042e-01 (8.4172e-01)\n",
      "Epoch: [18][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9788e-01 (8.4165e-01)\n",
      "Epoch: [18][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.8304e-01 (8.4169e-01)\n",
      "Epoch: [18][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.5761e-01 (8.4075e-01)\n",
      "Epoch: [18][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.3061e-01 (8.3920e-01)\n",
      "Epoch: [18][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4573e-01 (8.4030e-01)\n",
      "Epoch: [18][1000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 7.4434e-01 (8.3884e-01)\n",
      "Epoch: [18][1050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 8.4999e-01 (8.3829e-01)\n",
      "Epoch: [18][1100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4430e-01 (8.3747e-01)\n",
      "Epoch: [18][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7426e-01 (8.3718e-01)\n",
      "Epoch: [18][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.3940e-01 (8.3734e-01)\n",
      "Epoch: [18][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.8830e-01 (8.3750e-01)\n",
      "Epoch: [18][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7616e-01 (8.3783e-01)\n",
      "Epoch: [18][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2769e-01 (8.3688e-01)\n",
      "Epoch: [18][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4995e-01 (8.3711e-01)\n",
      "Epoch: [18][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0690e-01 (8.3709e-01)\n",
      "Epoch: [18][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3516e-01 (8.3694e-01)\n",
      "Epoch: [18][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5833e-01 (8.3656e-01)\n",
      "Epoch: [18][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8046e-01 (8.3622e-01)\n",
      "Epoch: [18][1650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.0793e-01 (8.3690e-01)\n",
      "Epoch: [18][1700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 9.3145e-01 (8.3730e-01)\n",
      "Epoch: [18][1750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 1.0030e+00 (8.3689e-01)\n",
      "Epoch: [18][1800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 9.0017e-01 (8.3645e-01)\n",
      "Epoch: [18][1850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.0084e-01 (8.3644e-01)\n",
      "Epoch: [18][1900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4974e-01 (8.3702e-01)\n",
      "Epoch: [18][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0565e-01 (8.3697e-01)\n",
      "Epoch: [18][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1549e-01 (8.3641e-01)\n",
      "Epoch: [18][2050/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8509e-01 (8.3653e-01)\n",
      "Epoch: [18][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9398e-01 (8.3677e-01)\n",
      "Epoch: [18][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9334e-01 (8.3636e-01)\n",
      "Epoch: [18][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1550e-01 (8.3704e-01)\n",
      "Epoch: [18][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9867e-01 (8.3720e-01)\n",
      "Epoch: [18][2300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3324e-01 (8.3717e-01)\n",
      "Epoch: [18][2350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0956e-01 (8.3753e-01)\n",
      "Epoch: [18][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3338e-01 (8.3764e-01)\n",
      "Epoch: [18][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0831e+00 (8.3772e-01)\n",
      "Epoch: [18][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8274e-01 (8.3814e-01)\n",
      "Epoch: [18][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6299e-01 (8.3776e-01)\n",
      "Epoch: [18][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1623e-01 (8.3774e-01)\n",
      "Epoch: [18][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9552e-01 (8.3736e-01)\n",
      "Epoch: [18][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9605e-01 (8.3751e-01)\n",
      "Epoch: [18][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9187e-01 (8.3749e-01)\n",
      "Epoch: [18][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4181e-01 (8.3754e-01)\n",
      "Epoch: [18][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5928e-01 (8.3733e-01)\n",
      "Epoch: [18][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4698e-01 (8.3742e-01)\n",
      "Epoch: [18][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0982e-01 (8.3737e-01)\n",
      "Epoch: [18][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8365e-01 (8.3750e-01)\n",
      "Epoch: [18][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4165e-01 (8.3744e-01)\n",
      "Epoch: [18][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5006e-01 (8.3719e-01)\n",
      "Epoch: [18][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1512e-01 (8.3745e-01)\n",
      "Epoch: [18][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1460e-01 (8.3756e-01)\n",
      "Epoch: [18][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0182e-01 (8.3743e-01)\n",
      "Epoch: [18][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5289e-01 (8.3699e-01)\n",
      "Epoch: [18][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5976e-01 (8.3718e-01)\n",
      "Epoch: [18][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9096e-01 (8.3683e-01)\n",
      "Epoch: [18][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9037e-01 (8.3684e-01)\n",
      "Epoch: [18][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1883e-01 (8.3710e-01)\n",
      "Epoch: [18][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6413e-01 (8.3702e-01)\n",
      "Epoch: [18][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9642e-01 (8.3676e-01)\n",
      "Epoch: [18][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0746e+00 (8.3689e-01)\n",
      "Epoch: [18][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9156e-01 (8.3692e-01)\n",
      "Epoch: [18][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0572e-01 (8.3715e-01)\n",
      "Epoch: [18][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2294e-01 (8.3705e-01)\n",
      "Epoch: [18][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1805e-01 (8.3706e-01)\n",
      "Epoch: [18][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7238e-01 (8.3712e-01)\n",
      "Epoch: [18][3950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0024e-01 (8.3719e-01)\n",
      "Epoch: [18][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3957e-01 (8.3729e-01)\n",
      "Epoch: [18][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8113e-01 (8.3757e-01)\n",
      "Epoch: [18][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3317e-01 (8.3751e-01)\n",
      "Epoch: [18][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0199e-01 (8.3762e-01)\n",
      "Epoch: [18][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5720e-01 (8.3780e-01)\n",
      "Epoch: [18][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9523e-01 (8.3799e-01)\n",
      "Epoch: [18][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3869e-01 (8.3804e-01)\n",
      "Epoch: [18][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3965e-01 (8.3808e-01)\n",
      "Epoch: [18][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5741e-01 (8.3823e-01)\n",
      "Epoch: [18][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3960e-01 (8.3805e-01)\n",
      "Epoch: [18][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6955e-01 (8.3832e-01)\n",
      "Epoch: [18][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8784e-01 (8.3840e-01)\n",
      "Epoch: [18][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0311e-01 (8.3854e-01)\n",
      "Epoch: [18][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3077e-01 (8.3842e-01)\n",
      "Epoch: [18][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0653e-01 (8.3844e-01)\n",
      "Epoch: [18][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5336e-01 (8.3839e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [18][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9921e-01 (8.3841e-01)\n",
      "Epoch: [18][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5931e-01 (8.3861e-01)\n",
      "Epoch: [18][4900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8259e-01 (8.3879e-01)\n",
      "Epoch: [18][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6768e-01 (8.3875e-01)\n",
      "Epoch: [18][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8608e-01 (8.3879e-01)\n",
      "Test: [  0/196]\tTime  3.474 ( 3.474)\tLoss 5.5536e-01 (5.5536e-01)\tAcc@1  85.94 ( 85.94)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.493)\tLoss 4.3713e-01 (7.3204e-01)\tAcc@1  89.45 ( 80.53)\tAcc@5  97.27 ( 95.53)\n",
      "Test: [100/196]\tTime  0.433 ( 0.463)\tLoss 1.3129e+00 (8.6529e-01)\tAcc@1  64.84 ( 77.37)\tAcc@5  89.06 ( 94.09)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.1744e+00 (9.8137e-01)\tAcc@1  74.61 ( 75.12)\tAcc@5  89.06 ( 92.54)\n",
      "epoch 18 0.8388007092393749 74.13199615478516 0.0005000000000000008 2344421 0.09999448081815601\n",
      "Epoch: [19][   0/5005]\tTime  3.102 ( 3.102)\tData  2.462 ( 2.462)\tLoss 8.6009e-01 (8.6009e-01)\n",
      "Epoch: [19][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.048)\tLoss 8.4726e-01 (8.1919e-01)\n",
      "Epoch: [19][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.025)\tLoss 8.7724e-01 (8.3227e-01)\n",
      "Epoch: [19][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 6.6212e-01 (8.1756e-01)\n",
      "Epoch: [19][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.012)\tLoss 8.6278e-01 (8.2119e-01)\n",
      "Epoch: [19][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.010)\tLoss 7.2537e-01 (8.2538e-01)\n",
      "Epoch: [19][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 8.1799e-01 (8.2266e-01)\n",
      "Epoch: [19][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 5.9580e-01 (8.2131e-01)\n",
      "Epoch: [19][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.9356e-01 (8.2232e-01)\n",
      "Epoch: [19][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.9637e-01 (8.2550e-01)\n",
      "Epoch: [19][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.6690e-01 (8.2571e-01)\n",
      "Epoch: [19][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.2951e-01 (8.2442e-01)\n",
      "Epoch: [19][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.9001e-01 (8.2373e-01)\n",
      "Epoch: [19][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 6.8066e-01 (8.2294e-01)\n",
      "Epoch: [19][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.2845e-01 (8.2375e-01)\n",
      "Epoch: [19][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.6018e-01 (8.2328e-01)\n",
      "Epoch: [19][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.2754e-01 (8.2395e-01)\n",
      "Epoch: [19][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.1320e-01 (8.2425e-01)\n",
      "Epoch: [19][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.9807e-01 (8.2373e-01)\n",
      "Epoch: [19][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.3361e-01 (8.2369e-01)\n",
      "Epoch: [19][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6497e-01 (8.2385e-01)\n",
      "Epoch: [19][1050/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.9840e-01 (8.2372e-01)\n",
      "Epoch: [19][1100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6750e-01 (8.2449e-01)\n",
      "Epoch: [19][1150/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8211e-01 (8.2377e-01)\n",
      "Epoch: [19][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8888e-01 (8.2337e-01)\n",
      "Epoch: [19][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9174e-01 (8.2388e-01)\n",
      "Epoch: [19][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.0550e-01 (8.2327e-01)\n",
      "Epoch: [19][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3135e-01 (8.2336e-01)\n",
      "Epoch: [19][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6425e-01 (8.2314e-01)\n",
      "Epoch: [19][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6983e-01 (8.2268e-01)\n",
      "Epoch: [19][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8299e-01 (8.2211e-01)\n",
      "Epoch: [19][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.0990e-01 (8.2139e-01)\n",
      "Epoch: [19][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5725e-01 (8.2146e-01)\n",
      "Epoch: [19][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.3970e-01 (8.2225e-01)\n",
      "Epoch: [19][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2189e-01 (8.2198e-01)\n",
      "Epoch: [19][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0189e+00 (8.2254e-01)\n",
      "Epoch: [19][1800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.4876e-01 (8.2190e-01)\n",
      "Epoch: [19][1850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.5673e-01 (8.2234e-01)\n",
      "Epoch: [19][1900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6155e-01 (8.2256e-01)\n",
      "Epoch: [19][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7317e-01 (8.2250e-01)\n",
      "Epoch: [19][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2396e-01 (8.2255e-01)\n",
      "Epoch: [19][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0664e-01 (8.2274e-01)\n",
      "Epoch: [19][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0894e-01 (8.2266e-01)\n",
      "Epoch: [19][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2533e-01 (8.2247e-01)\n",
      "Epoch: [19][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3598e-01 (8.2276e-01)\n",
      "Epoch: [19][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2447e-01 (8.2304e-01)\n",
      "Epoch: [19][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0329e+00 (8.2291e-01)\n",
      "Epoch: [19][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9893e-01 (8.2332e-01)\n",
      "Epoch: [19][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2694e-01 (8.2356e-01)\n",
      "Epoch: [19][2450/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2780e-01 (8.2347e-01)\n",
      "Epoch: [19][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0841e-01 (8.2354e-01)\n",
      "Epoch: [19][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5845e-01 (8.2395e-01)\n",
      "Epoch: [19][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8519e-01 (8.2398e-01)\n",
      "Epoch: [19][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7345e-01 (8.2389e-01)\n",
      "Epoch: [19][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5277e-01 (8.2424e-01)\n",
      "Epoch: [19][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2893e-01 (8.2390e-01)\n",
      "Epoch: [19][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4429e-01 (8.2384e-01)\n",
      "Epoch: [19][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1399e-01 (8.2382e-01)\n",
      "Epoch: [19][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2558e-01 (8.2357e-01)\n",
      "Epoch: [19][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4318e-01 (8.2347e-01)\n",
      "Epoch: [19][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6445e-01 (8.2355e-01)\n",
      "Epoch: [19][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1503e-01 (8.2368e-01)\n",
      "Epoch: [19][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0226e-01 (8.2355e-01)\n",
      "Epoch: [19][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7305e-01 (8.2366e-01)\n",
      "Epoch: [19][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8785e-01 (8.2409e-01)\n",
      "Epoch: [19][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0861e-01 (8.2383e-01)\n",
      "Epoch: [19][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5476e-01 (8.2380e-01)\n",
      "Epoch: [19][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4905e-01 (8.2395e-01)\n",
      "Epoch: [19][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7295e-01 (8.2442e-01)\n",
      "Epoch: [19][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7653e-01 (8.2458e-01)\n",
      "Epoch: [19][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1901e-01 (8.2456e-01)\n",
      "Epoch: [19][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8994e-01 (8.2490e-01)\n",
      "Epoch: [19][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4187e-01 (8.2524e-01)\n",
      "Epoch: [19][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4019e-01 (8.2484e-01)\n",
      "Epoch: [19][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4126e-01 (8.2500e-01)\n",
      "Epoch: [19][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8071e-01 (8.2500e-01)\n",
      "Epoch: [19][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8249e-01 (8.2498e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [19][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3550e-01 (8.2505e-01)\n",
      "Epoch: [19][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1459e-01 (8.2477e-01)\n",
      "Epoch: [19][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9117e-01 (8.2466e-01)\n",
      "Epoch: [19][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6564e-01 (8.2443e-01)\n",
      "Epoch: [19][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9756e-01 (8.2444e-01)\n",
      "Epoch: [19][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1427e-01 (8.2448e-01)\n",
      "Epoch: [19][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6116e-01 (8.2450e-01)\n",
      "Epoch: [19][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1431e-01 (8.2458e-01)\n",
      "Epoch: [19][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0857e-01 (8.2427e-01)\n",
      "Epoch: [19][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7758e-01 (8.2438e-01)\n",
      "Epoch: [19][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3618e-01 (8.2436e-01)\n",
      "Epoch: [19][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3914e-01 (8.2431e-01)\n",
      "Epoch: [19][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8166e-01 (8.2392e-01)\n",
      "Epoch: [19][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0926e-01 (8.2395e-01)\n",
      "Epoch: [19][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5451e-01 (8.2386e-01)\n",
      "Epoch: [19][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4226e-01 (8.2430e-01)\n",
      "Epoch: [19][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5244e-01 (8.2460e-01)\n",
      "Epoch: [19][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5567e-01 (8.2448e-01)\n",
      "Epoch: [19][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3387e-01 (8.2448e-01)\n",
      "Epoch: [19][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5593e-01 (8.2427e-01)\n",
      "Epoch: [19][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8599e-01 (8.2431e-01)\n",
      "Epoch: [19][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3691e-01 (8.2382e-01)\n",
      "Epoch: [19][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6448e-01 (8.2375e-01)\n",
      "Epoch: [19][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2862e-01 (8.2351e-01)\n",
      "Test: [  0/196]\tTime  3.548 ( 3.548)\tLoss 5.4734e-01 (5.4734e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.494)\tLoss 4.4774e-01 (7.2408e-01)\tAcc@1  87.89 ( 80.81)\tAcc@5  96.88 ( 95.60)\n",
      "Test: [100/196]\tTime  0.433 ( 0.464)\tLoss 1.3348e+00 (8.5279e-01)\tAcc@1  63.28 ( 77.80)\tAcc@5  89.06 ( 94.20)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1436e+00 (9.6839e-01)\tAcc@1  75.00 ( 75.53)\tAcc@5  89.84 ( 92.74)\n",
      "epoch 19 0.8235232569022133 74.50599670410156 0.0 2344421 0.09999448081815601\n",
      "acc 74.50599670410156\n"
     ]
    }
   ],
   "source": [
    "def get_res(epochs=20):\n",
    "    from torchvision.models import resnet50\n",
    "    model = resnet50(pretrained=True)\n",
    "    model.cuda()\n",
    "    criterion_val = nn.CrossEntropyLoss()\n",
    "    #acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    #print(\"dense acc\", acc1)\n",
    "    \n",
    "    total_params = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if type(m) == nn.Conv2d and m.weight.shape[1] > 3:\n",
    "            total_params += m.weight.numel()\n",
    "    print(\"tot\", total_params)\n",
    "    \n",
    "    model = run_dsp(model)\n",
    "    \n",
    "    \n",
    "    #optimizer = torch.optim.AdamW(model.parameters(), 0.001)\n",
    "    opt0 = torch.optim.SGD(model.parameters(), 0.0, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2], gamma=0.1)\n",
    "    scheduler = torch.optim.lr_scheduler.PolynomialLR(optimizer, total_iters=epochs, power=1)\n",
    "    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 0.004, epochs, cycle_momentum=False)\n",
    "    criterion = nn.CrossEntropyLoss()#SoftTargetCrossEntropy()\n",
    "    criterion_val = nn.CrossEntropyLoss()\n",
    "    scaler = torch.cuda.amp.GradScaler(enabled=True)\n",
    "    \n",
    "    best_acc1 = 0\n",
    "    \n",
    "    print(model, file=sys.stderr)\n",
    "    \n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    print(\"start acc no bn\", acc1)\n",
    "    train_loss = train(train_loader, model, criterion, opt0, scaler, -1)\n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    total_active = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if type(m) == nn.Conv2d and m.weight.shape[1] > 3 and (\"conv2b\" not in n and \"conv1b\" not in n and \"sb\" not in n and \"conv3b\" not in n):\n",
    "            total_active += (m.weight != 0).sum().item()\n",
    "    print(\"start acc bn\", acc1, total_active)\n",
    "\n",
    "    for epoch in range(epochs):\n",
    "        train_loss = train(train_loader, model, criterion, optimizer, scaler, epoch)\n",
    "        acc1 = validate(val_loader, model, criterion_val).item()\n",
    "        scheduler.step()\n",
    "        \n",
    "        # remember best acc@1 and save checkpoint\n",
    "        is_best = acc1 > best_acc1\n",
    "        best_acc1 = max(acc1, best_acc1)\n",
    "        total_active = 0\n",
    "        for n, m in model.named_modules():\n",
    "            if type(m) == nn.Conv2d and m.weight.shape[1] > 3 and (\"conv2b\" not in n and \"conv1b\" not in n and \"sb\" not in n and \"conv3b\" not in n):\n",
    "                total_active += (m.weight != 0).sum().item()\n",
    "\n",
    "        print(\"epoch\", epoch, train_loss, acc1, optimizer.param_groups[0]['lr'], total_active, total_active / total_params)\n",
    "    \n",
    "    return acc1, copy.deepcopy(model.state_dict())\n",
    "\n",
    "acc, end = get_res()\n",
    "\n",
    "print(\"acc\", acc)"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
