{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f553ec87",
   "metadata": {
    "tags": [
     "parameters"
    ]
   },
   "outputs": [],
   "source": [
    "seed = 10\n",
    "sparsity = 0.8\n",
    "width = 32"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6edfc04b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fda18cc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import os\n",
    "import torchvision.transforms as transforms\n",
    "import torchvision.datasets as datasets\n",
    "import time\n",
    "import copy\n",
    "import sys\n",
    "\n",
    "import random\n",
    "import numpy as np\n",
    "import torch\n",
    "from sklearn.decomposition import PCA\n",
    "import matplotlib.pyplot as plt\n",
    "import scipy.stats as ss\n",
    "from timm.data import Mixup\n",
    "from timm.loss import SoftTargetCrossEntropy\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.init as init\n",
    "import torch.nn.functional as F\n",
    "from torch.autograd import Variable\n",
    "\n",
    "import sys\n",
    "import numpy as np\n",
    "import torch.nn.utils.prune as prune\n",
    "from datautils import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c9b3aabc",
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_seed(seed=42, rank=0):\n",
    "    torch.manual_seed(seed + rank)\n",
    "    np.random.seed(seed + rank)\n",
    "    random.seed(seed + rank)\n",
    "\n",
    "random_seed(47)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2348c12a",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader, val_loader = get_loaders(\n",
    "    \"imagenet\", path=\"\",\n",
    "    batchsize=256, workers=8,\n",
    "    nsamples=-1, seed=0,\n",
    "    noaug=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c50e599a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(train_loader, model, criterion, optimizer, scaler, epoch):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    data_time = AverageMeter('Data', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    #top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    #top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(train_loader),\n",
    "        [batch_time, data_time, losses],\n",
    "        prefix=\"Epoch: [{}]\".format(epoch))\n",
    "\n",
    "    # switch to train mode\n",
    "    model.train()\n",
    "\n",
    "    end = time.time()\n",
    "    for i, (images, target) in enumerate(train_loader):\n",
    "        # measure data loading time\n",
    "        data_time.update(time.time() - end)\n",
    "        images = images.cuda(non_blocking=True)\n",
    "        target = target.cuda(non_blocking=True)\n",
    "\n",
    "        # compute output\n",
    "        with torch.cuda.amp.autocast(enabled=True):\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "        # measure accuracy and record loss\n",
    "        #acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "        losses.update(loss.item(), images.size(0))\n",
    "        #top1.update(acc1[0], images.size(0))\n",
    "        #top5.update(acc5[0], images.size(0))\n",
    "\n",
    "        # compute gradient and do SGD step\n",
    "        optimizer.zero_grad()\n",
    "        scaler.scale(loss).backward()\n",
    "        scaler.step(optimizer)\n",
    "        scaler.update()\n",
    "        #loss.backward()\n",
    "        #optimizer.step()\n",
    "\n",
    "        # measure elapsed time\n",
    "        batch_time.update(time.time() - end)\n",
    "        end = time.time()\n",
    "\n",
    "        if i % 50 == 0:\n",
    "            progress.display(i)\n",
    "        if epoch == -1 and i == 50:\n",
    "            break\n",
    "\n",
    "    return losses.avg\n",
    "\n",
    "\n",
    "def validate(val_loader, model, criterion):\n",
    "    batch_time = AverageMeter('Time', ':6.3f')\n",
    "    losses = AverageMeter('Loss', ':.4e')\n",
    "    top1 = AverageMeter('Acc@1', ':6.2f')\n",
    "    top5 = AverageMeter('Acc@5', ':6.2f')\n",
    "    progress = ProgressMeter(\n",
    "        len(val_loader),\n",
    "        [batch_time, losses, top1, top5],\n",
    "        prefix='Test: ')\n",
    "\n",
    "    # switch to evaluate mode\n",
    "    model.eval()\n",
    "\n",
    "    with torch.no_grad():\n",
    "        end = time.time()\n",
    "        for i, (images, target) in enumerate(val_loader):\n",
    "            images = images.cuda(non_blocking=True)\n",
    "            target = target.cuda(non_blocking=True)\n",
    "\n",
    "            # compute output\n",
    "            output = model(images)\n",
    "            loss = criterion(output, target)\n",
    "\n",
    "            # measure accuracy and record loss\n",
    "            acc1, acc5 = accuracy(output, target, topk=(1, 5))\n",
    "            losses.update(loss.item(), images.size(0))\n",
    "            top1.update(acc1[0], images.size(0))\n",
    "            top5.update(acc5[0], images.size(0))\n",
    "\n",
    "            # measure elapsed time\n",
    "            batch_time.update(time.time() - end)\n",
    "            end = time.time()\n",
    "\n",
    "            if i % 50 == 0:\n",
    "                progress.display(i)\n",
    "\n",
    "        # TODO: this should also be done with the ProgressMeter\n",
    "        #print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'\n",
    "        #      .format(top1=top1, top5=top5))\n",
    "\n",
    "    return top1.avg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "698abd39",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):\n",
    "    torch.save(state, filename)\n",
    "    if is_best:\n",
    "        shutil.copyfile(filename, 'model_best.pth.tar')\n",
    "\n",
    "\n",
    "class AverageMeter(object):\n",
    "    \"\"\"Computes and stores the average and current value\"\"\"\n",
    "    def __init__(self, name, fmt=':f'):\n",
    "        self.name = name\n",
    "        self.fmt = fmt\n",
    "        self.reset()\n",
    "\n",
    "    def reset(self):\n",
    "        self.val = 0\n",
    "        self.avg = 0\n",
    "        self.sum = 0\n",
    "        self.count = 0\n",
    "\n",
    "    def update(self, val, n=1):\n",
    "        self.val = val\n",
    "        self.sum += val * n\n",
    "        self.count += n\n",
    "        self.avg = self.sum / self.count\n",
    "\n",
    "    def __str__(self):\n",
    "        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'\n",
    "        return fmtstr.format(**self.__dict__)\n",
    "\n",
    "\n",
    "class ProgressMeter(object):\n",
    "    def __init__(self, num_batches, meters, prefix=\"\"):\n",
    "        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)\n",
    "        self.meters = meters\n",
    "        self.prefix = prefix\n",
    "\n",
    "    def display(self, batch):\n",
    "        entries = [self.prefix + self.batch_fmtstr.format(batch)]\n",
    "        entries += [str(meter) for meter in self.meters]\n",
    "        print('\\t'.join(entries))\n",
    "\n",
    "    def _get_batch_fmtstr(self, num_batches):\n",
    "        num_digits = len(str(num_batches // 1))\n",
    "        fmt = '{:' + str(num_digits) + 'd}'\n",
    "        return '[' + fmt + '/' + fmt.format(num_batches) + ']'\n",
    "\n",
    "\n",
    "def adjust_learning_rate(optimizer, epoch):\n",
    "    \"\"\"Sets the learning rate to the initial LR decayed by 10 every 30 epochs\"\"\"\n",
    "    lr = LR * (0.1 ** (epoch // 30))\n",
    "    for param_group in optimizer.param_groups:\n",
    "        param_group['lr'] = lr\n",
    "\n",
    "\n",
    "def accuracy(output, target, topk=(1,)):\n",
    "    \"\"\"Computes the accuracy over the k top predictions for the specified values of k\"\"\"\n",
    "    with torch.no_grad():\n",
    "        maxk = max(topk)\n",
    "        batch_size = target.size(0)\n",
    "\n",
    "        _, pred = output.topk(maxk, 1, True, True)\n",
    "        pred = pred.t()\n",
    "        correct = pred.eq(target.view(1, -1).expand_as(pred))\n",
    "\n",
    "        res = []\n",
    "        for k in topk:\n",
    "            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)\n",
    "            res.append(correct_k.mul_(100.0 / batch_size))\n",
    "        return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "76d49f44",
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_other2(A, W, nnz, Z, U, print_sc=None, debug=False, reg=0, rho_start=0.03, iters=5, prune_iters=2):\n",
    "    XX = A.T.matmul(A)\n",
    "    norm2 = torch.diag(XX).sqrt() + 1e-8\n",
    "    An = A / norm2\n",
    "    XX = An.T.matmul(An)\n",
    "    XX += torch.diag(torch.ones_like(XX.diag())) * XX.diag().mean() * reg\n",
    "    \n",
    "    #norm2 = torch.ones_like(norm2)\n",
    "    Wnn = W# * norm2.unsqueeze(1)\n",
    "    rho = 1\n",
    "    XY = An.T.matmul(Wnn)\n",
    "    XXinv = torch.inverse(XX + torch.eye(XX.shape[1], device=XX.device)*rho)\n",
    "    XXinv2 = torch.inverse(XX + torch.eye(XX.shape[1], device=XX.device)*rho_start)\n",
    "    U = U * norm2.unsqueeze(1)\n",
    "    Z = Z * norm2.unsqueeze(1)\n",
    "    \n",
    "    #B = torch.linalg.solve(XX, XY)\n",
    "    B = XXinv2.matmul(XY + rho_start*(Z-U))\n",
    "    \n",
    "    #U = torch.zeros_like(B)\n",
    "    \n",
    "    #Z = B\n",
    "    \n",
    "    bsparsity = min(0.99, 1 - nnz/B.numel())\n",
    "    #print(\"bs\", bsparsity)\n",
    "\n",
    "\n",
    "    for itt in range(iters):\n",
    "        if itt < prune_iters:\n",
    "            cur_sparsity = bsparsity# - bsparsity * (1 - (itt + 1) / iterative_prune) ** 3\n",
    "            thres = (B+U).abs().flatten().sort()[0][int(B.numel() * cur_sparsity)]\n",
    "            mask = ((B+U).abs() > thres)\n",
    "            del thres\n",
    "\n",
    "        Z = (B + U) * mask    \n",
    "\n",
    "        U = U + (B - Z)    \n",
    "\n",
    "        B = XXinv.matmul(XY + rho*(Z-U))\n",
    "        #B = torch.linalg.solve(XX + torch.eye(XX.shape[1], device=XX.device)*rho, XY + rho*(Z-U))\n",
    "        if debug:\n",
    "            print(itt, cur_sparsity, (Z != 0).sum().item() / Z.numel())\n",
    "            print_sc(A.matmul(B / norm2.unsqueeze(1)))\n",
    "            print_sc(A.matmul(Z / norm2.unsqueeze(1)))\n",
    "            print(((An != 0).sum() + (Z != 0).sum()) / W.numel())\n",
    "            print(\"-------\")\n",
    "    if debug:\n",
    "        print(\"opt end\")\n",
    "\n",
    "    return Z / norm2.unsqueeze(1), U / norm2.unsqueeze(1)    \n",
    "    \n",
    "def mag_prune(W, sp=0.6):\n",
    "    thres = (W).abs().flatten().sort()[0][int(W.numel() * sp)]\n",
    "    mask = ((W).abs() > thres)\n",
    "    return W * mask\n",
    "\n",
    "def ent(p):\n",
    "    return -(p * np.log2(p) + (1-p) * np.log2(1-p))\n",
    "\n",
    "def factorizeT(W, XX, asp=0.16, sp=0.4, iters=40):\n",
    "    #W = lx.weight.detach().T.float()\n",
    "    nza = int(W.shape[0]**2 * asp)\n",
    "    nzb = int(W.numel() * sp - nza)\n",
    "    \n",
    "    Az = torch.eye(W.shape[0], device=W.device)\n",
    "    Au = torch.zeros_like(Az)\n",
    "    norm = XX.diag().sqrt().unsqueeze(1) + 1e-8\n",
    "    norm = torch.ones_like(norm)\n",
    "       \n",
    "    Wn = W * norm\n",
    "       \n",
    "    print(\"nz\", nza, nzb, Wn.shape)\n",
    "    Bz = mag_prune(Wn, (1 - nzb/W.numel()))\n",
    "    Bu = torch.zeros_like(Bz)\n",
    "    \n",
    "    for itt in range(iters):\n",
    "        #if itt < 10:\n",
    "        #    rho_start = 0.0\n",
    "        #elif itt < 15:\n",
    "        #    rho_start = 0.00\n",
    "        #else:\n",
    "        #    rho_start = 0.1\n",
    "        rho_start = min(1.0, itt / (iters-3))**3\n",
    "        Az, Au = (x.T for x in find_other2(Bz.T, Wn.T, nza, Az.T, Au.T, reg=1e-2, debug=False, rho_start=rho_start))\n",
    "                \n",
    "        Bz, Bu = find_other2(Az, Wn, nzb, Bz, Bu, reg=1e-2, debug=False, rho_start=rho_start)\n",
    "    \n",
    "    #print(((Az != 0).sum() + (Bz != 0).sum()).item() / W.numel(), (Az != 0).sum().item() / Az.numel(),\n",
    "    #      (Bz != 0).sum().item() / Bz.numel(), Az.shape, Bz.shape,\n",
    "    #     (Az.numel()*ent((Az != 0).sum().item() / Az.numel()) + Bz.numel()*ent((Bz != 0).sum().item() / Bz.numel())) / W.numel(), \n",
    "    #    ent(0.4), ent(0.5))\n",
    "    return ((Az / norm).matmul(Bz)).T, Bz.T, (Az / norm).T\n",
    "\n",
    "\n",
    "def factorizef(W, XX, asp=0.16, sp=0.4, iters=200, l_prev=None):\n",
    "    s_time = time.time()\n",
    "    if W.shape[0] >= W.shape[1]:\n",
    "        return factorizeT(W.T, XX, sp=sp, asp=asp, iters=iters)\n",
    "    \n",
    "    nza = int(W.shape[0]**2 * asp)\n",
    "    nzb = int(W.numel() * sp - nza)\n",
    "    norm = XX.diag().sqrt() + 1e-8\n",
    "    norm = torch.ones_like(norm)\n",
    "\n",
    "    Wn = W * norm\n",
    "    \n",
    "    Az = torch.eye(W.shape[0], device=W.device)\n",
    "    Au = torch.zeros_like(Az)\n",
    "\n",
    "    print(\"nz\", nza, nzb, Wn.shape)\n",
    "    Bz = mag_prune(Wn, (1 - nzb/W.numel()))\n",
    "    Bu = torch.zeros_like(Bz)\n",
    "    \n",
    "    for itt in range(iters):\n",
    "        #if itt < 10:\n",
    "        #    rho_start = 0.0\n",
    "        #elif itt < 15:\n",
    "        #    rho_start = 0.00\n",
    "        #else:\n",
    "        #    rho_start = 0.1\n",
    "            \n",
    "        rho_start = min(1.0, itt / (iters-3))**3\n",
    "        Az, Au = (x.T for x in find_other2(Bz.T, Wn.T, nza, Az.T, Au.T, reg=1e-2, debug=False, rho_start=rho_start))\n",
    "                \n",
    "        Bz, Bu = find_other2(Az, Wn, nzb, Bz, Bu, reg=1e-2, debug=False, rho_start=rho_start)\n",
    "        \n",
    "        #print(itt, time.time() - s_time, end =\" \") \n",
    "        #print_scores(Az.matmul(Bz / norm))\n",
    "        \n",
    "        \n",
    "    #print(((Az != 0).sum() + (Bz != 0).sum()).item() / W.numel(), (Az != 0).sum().item() / Az.numel(),\n",
    "    #      (Bz != 0).sum().item() / Bz.numel(), Az.shape, Bz.shape,\n",
    "    #     (Az.numel()*ent((Az != 0).sum().item() / Az.numel()) + Bz.numel()*ent((Bz != 0).sum().item() / Bz.numel())) / W.numel(), \n",
    "    #    ent(0.4), ent(0.5))\n",
    "    return Az.matmul(Bz / norm), Az, Bz / norm\n",
    "\n",
    "def factorize(XX, W, sp, l_prev=None):\n",
    "    W = W.detach().float()\n",
    "    asp = max(0.05, sp/2)\n",
    "    W2, Ab, Bb = factorizef(W, XX, sp=sp, asp=asp, l_prev=l_prev)\n",
    "    An = Ab.norm(dim=0) + 1e-12\n",
    "    Bn = Bb.norm(dim=1) + 1e-12\n",
    "    #print(An, Bn)\n",
    "    Ab *= (Bn/An).sqrt()\n",
    "    Bb *= (An/Bn).sqrt().unsqueeze(1)\n",
    "    #print(Ab.norm(dim=0), Bb.norm(dim=1))\n",
    "    W2 = Ab.matmul(Bb)\n",
    "    print(\"err_prefin\", (W2 - W).matmul(XX).matmul((W2 - W).T).diag().sum().item(), W.abs().amax().item(), Ab.abs().amax().item(), Bb.abs().amax().item())\n",
    "    #qq = qqqq\n",
    "    print(\"sparsity check\", ((Ab != 0).sum() + (Bb != 0).sum()).item() / W2.numel())\n",
    "    return W2, (Ab, Bb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d1aab8b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "def hook(m, *args, **kwargs):\n",
    "    m.weight = m.wo * m.mask\n",
    "\n",
    "def add_mask(m):\n",
    "    m.register_parameter(\"wo\", m.weight)\n",
    "    m.register_buffer(\"mask\", torch.nn.Parameter((m.weight.data != 0).to(torch.float32)))\n",
    "    del m._parameters[\"weight\"]\n",
    "    m.register_forward_pre_hook(hook)\n",
    "\n",
    "\n",
    "def run_dsp(model):\n",
    "    out_admm = {}\n",
    "    for n, m in model.named_modules():\n",
    "        if type(m) == nn.Conv2d and m.weight.shape[1] > 3:\n",
    "            density = 1 - sparsity\n",
    "            w_orig = m.weight.flatten(1)\n",
    "            w_mag = mag_prune(w_orig, sparsity)\n",
    "            w_admm, facts = factorize(torch.eye(w_orig.shape[1], device=w_orig.device), w_orig, density)\n",
    "            out_admm[n] = (w_admm.reshape(w_orig.shape), facts)\n",
    "            print(n, (w_admm - w_orig).square().sum().item(), (w_mag - w_orig).square().sum().item(), w_orig.square().sum().item())\n",
    "            #m.XX = None\n",
    "\n",
    "    for n, m in model.named_modules():\n",
    "        if n in out_admm:\n",
    "            print(\"change\", n)\n",
    "            m.weight.data = out_admm[n][0].reshape(m.weight.shape)\n",
    "            m.weight.facts = out_admm[n][1]\n",
    "    \n",
    "    for n, m in model.named_modules():\n",
    "        if \"Bottleneck\" in str(type(m)):\n",
    "            print(m.conv1.weight.shape, m.conv1.weight.facts[0].shape, m.conv1.weight.facts[1].shape)\n",
    "            if True:\n",
    "                ff = m.conv1.weight.facts\n",
    "                m.conv1b = m.conv1\n",
    "                m.conv1 = nn.Sequential(\n",
    "                    nn.Conv2d(m.conv1b.in_channels, m.conv1b.out_channels, 1, bias=False),\n",
    "                    nn.Conv2d(m.conv1b.out_channels, m.conv1b.out_channels, 1, bias=False)\n",
    "                )\n",
    "                m.conv1[0].weight.data = ff[1].reshape(m.conv1[0].weight.shape)\n",
    "                m.conv1[1].weight.data = ff[0].reshape(m.conv1[1].weight.shape)\n",
    "                m.conv1.cuda()\n",
    "                add_mask(m.conv1[0])\n",
    "                add_mask(m.conv1[1])\n",
    "                \n",
    "            print(m.conv2.weight.shape, m.conv2.weight.facts[0].shape, m.conv2.weight.facts[1].shape)\n",
    "            \n",
    "            if True:\n",
    "                ff = m.conv2.weight.facts\n",
    "                m.conv2b = m.conv2\n",
    "                m.conv2 = nn.Sequential(\n",
    "                    nn.Conv2d(m.conv2b.in_channels, m.conv2b.out_channels, 3, padding=1, stride=m.conv2b.stride, bias=False),\n",
    "                    nn.Conv2d(m.conv2b.out_channels, m.conv2b.out_channels, 1, bias=False)\n",
    "                )\n",
    "                #m.conv2[0].register_forward_hook(boo)\n",
    "                m.conv2[0].weight.data = ff[1].reshape(m.conv2[0].weight.shape)\n",
    "                m.conv2[1].weight.data = ff[0].reshape(m.conv2[1].weight.shape)\n",
    "                m.conv2.cuda()\n",
    "                add_mask(m.conv2[0])\n",
    "                add_mask(m.conv2[1])\n",
    "                \n",
    "            if True:\n",
    "                ff = m.conv3.weight.facts\n",
    "                m.conv3b = m.conv3\n",
    "                m.conv3 = nn.Sequential(\n",
    "                    nn.Conv2d(m.conv3b.in_channels, m.conv3b.in_channels, 1, bias=False),\n",
    "                    nn.Conv2d(m.conv3b.in_channels, m.conv3b.out_channels, 1, bias=False)\n",
    "                )\n",
    "                m.conv3[0].weight.data = ff[1].reshape(m.conv3[0].weight.shape)\n",
    "                m.conv3[1].weight.data = ff[0].reshape(m.conv3[1].weight.shape)\n",
    "                m.conv3.cuda()\n",
    "                add_mask(m.conv3[0])\n",
    "                add_mask(m.conv3[1])\n",
    "            \n",
    "            if m.downsample is not None:\n",
    "                print(m.downsample[0].weight.shape, m.downsample[0].weight.facts[0].shape, m.downsample[0].weight.facts[1].shape)\n",
    "                m.sb = m.downsample[0]\n",
    "                ff = m.sb.weight.facts\n",
    "                m.downsample[0] = nn.Sequential(\n",
    "                    nn.Conv2d(m.sb.in_channels, m.sb.in_channels, 1, stride=m.sb.stride, bias=False),\n",
    "                    nn.Conv2d(m.sb.in_channels, m.sb.out_channels, 1, bias=False)\n",
    "                )\n",
    "                #m.conv2[0].register_forward_hook(boo)\n",
    "                m.downsample[0][0].weight.data = ff[1].reshape(m.downsample[0][0].weight.shape)\n",
    "                m.downsample[0][1].weight.data = ff[0].reshape(m.downsample[0][1].weight.shape)\n",
    "                m.downsample.cuda()\n",
    "                add_mask(m.downsample[0][0])\n",
    "                add_mask(m.downsample[0][1])\n",
    "                \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "dba446b7",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tot 23445504\n",
      "nz 409 410 torch.Size([64, 64])\n",
      "err_prefin 1.0651941299438477 0.7266281247138977 0.8041678667068481 0.8065056204795837\n",
      "sparsity check 0.199462890625\n",
      "layer1.0.conv1 1.0651941299438477 2.076427459716797 20.62295150756836\n",
      "nz 409 6963 torch.Size([64, 576])\n",
      "err_prefin 2.040290594100952 0.46786433458328247 1.0558756589889526 0.4248051643371582\n",
      "sparsity check 0.1999240451388889\n",
      "layer1.0.conv2 2.0402908325195312 3.9869112968444824 31.344758987426758\n",
      "nz 409 2867 torch.Size([64, 256])\n",
      "err_prefin 1.1036280393600464 0.3936349153518677 0.47855135798454285 0.9105990529060364\n",
      "sparsity check 0.1998291015625\n",
      "layer1.0.conv3 1.1036280393600464 2.6578125953674316 20.379261016845703\n",
      "nz 409 2867 torch.Size([64, 256])\n",
      "err_prefin 2.736938953399658 0.987881064414978 0.7314780354499817 1.3047504425048828\n",
      "sparsity check 0.1998291015625\n",
      "layer1.0.downsample.0 2.736938953399658 5.378649711608887 54.36511993408203\n",
      "nz 409 2867 torch.Size([64, 256])\n",
      "err_prefin 1.4511997699737549 0.2617597281932831 0.8334307074546814 0.37900516390800476\n",
      "sparsity check 0.1998291015625\n",
      "layer1.1.conv1 1.4511997699737549 2.9557154178619385 15.39460277557373\n",
      "nz 409 6963 torch.Size([64, 576])\n",
      "err_prefin 4.019418716430664 0.5201045870780945 1.012279987335205 0.4952540993690491\n",
      "sparsity check 0.1999240451388889\n",
      "layer1.1.conv2 4.019418716430664 6.249660491943359 30.597768783569336\n",
      "nz 409 2867 torch.Size([64, 256])\n",
      "err_prefin 1.4144227504730225 0.29462704062461853 0.42712703347206116 0.9251440167427063\n",
      "sparsity check 0.1998291015625\n",
      "layer1.1.conv3 1.4144227504730225 2.5048348903656006 17.34823989868164\n",
      "nz 409 2867 torch.Size([64, 256])\n",
      "err_prefin 1.857330560684204 0.19206704199314117 0.6339094638824463 0.28072094917297363\n",
      "sparsity check 0.1998291015625\n",
      "layer1.2.conv1 1.857330560684204 3.534888505935669 14.82396125793457\n",
      "nz 409 6963 torch.Size([64, 576])\n",
      "err_prefin 6.531692028045654 0.2855665683746338 0.8047491312026978 0.345925509929657\n",
      "sparsity check 0.1999240451388889\n",
      "layer1.2.conv2 6.531692028045654 10.0328369140625 37.246177673339844\n",
      "nz 409 2867 torch.Size([64, 256])\n",
      "err_prefin 0.9858819842338562 0.2751551568508148 0.4340672492980957 0.9231433272361755\n",
      "sparsity check 0.1998291015625\n",
      "layer1.2.conv3 0.9858819246292114 1.7647202014923096 15.938384056091309\n",
      "nz 1638 4915 torch.Size([128, 256])\n",
      "err_prefin 3.687173366546631 0.3531537353992462 0.7023606896400452 0.47062429785728455\n",
      "sparsity check 0.199920654296875\n",
      "layer2.0.conv1 3.687173366546631 8.640190124511719 41.18355941772461\n",
      "nz 1638 27853 torch.Size([128, 1152])\n",
      "err_prefin 12.09605598449707 0.29927510023117065 0.859624981880188 0.3064732551574707\n",
      "sparsity check 0.1999850802951389\n",
      "layer2.0.conv2 12.09605598449707 19.553911209106445 73.3001937866211\n",
      "nz 1638 11469 torch.Size([128, 512])\n",
      "err_prefin 4.07205057144165 0.3916732370853424 0.4524252116680145 0.8655524253845215\n",
      "sparsity check 0.1999664306640625\n",
      "layer2.0.conv3 4.072050094604492 7.458742141723633 51.68487548828125\n",
      "nz 6553 19661 torch.Size([256, 512])\n",
      "err_prefin 2.9358391761779785 0.5662445425987244 0.42682960629463196 0.789291501045227\n",
      "sparsity check 0.199981689453125\n",
      "layer2.0.downsample.0 2.9358391761779785 8.832176208496094 70.04782104492188\n",
      "nz 1638 11469 torch.Size([128, 512])\n",
      "err_prefin 0.7670196294784546 0.25216183066368103 0.629206657409668 0.3174017667770386\n",
      "sparsity check 0.1999664306640625\n",
      "layer2.1.conv1 0.7670196294784546 2.3508172035217285 18.187597274780273\n",
      "nz 1638 27853 torch.Size([128, 1152])\n",
      "err_prefin 3.09183406829834 0.2997848093509674 0.7639109492301941 0.3377540409564972\n",
      "sparsity check 0.1999850802951389\n",
      "layer2.1.conv2 3.09183406829834 8.421213150024414 55.13344192504883\n",
      "nz 1638 11469 torch.Size([128, 512])\n",
      "err_prefin 1.368902564048767 0.30379050970077515 0.41746029257774353 0.8891639709472656\n",
      "sparsity check 0.1999664306640625\n",
      "layer2.1.conv3 1.368902564048767 2.8151402473449707 31.986560821533203\n",
      "nz 1638 11469 torch.Size([128, 512])\n",
      "err_prefin 3.813647747039795 0.23830968141555786 0.704634428024292 0.3306155800819397\n",
      "sparsity check 0.1999664306640625\n",
      "layer2.2.conv1 3.813647747039795 7.4581298828125 35.66695022583008\n",
      "nz 1638 27853 torch.Size([128, 1152])\n",
      "err_prefin 9.42346477508545 0.2555960714817047 0.8331655859947205 0.32825908064842224\n",
      "sparsity check 0.1999850802951389\n",
      "layer2.2.conv2 9.42346477508545 15.438810348510742 68.77576446533203\n",
      "nz 1638 11469 torch.Size([128, 512])\n",
      "err_prefin 5.732766151428223 0.35215842723846436 0.4598608613014221 0.7502653002738953\n",
      "sparsity check 0.1999664306640625\n",
      "layer2.2.conv3 5.732766151428223 9.775825500488281 44.91209411621094\n",
      "nz 1638 11469 torch.Size([128, 512])\n",
      "err_prefin 5.455206871032715 0.28143224120140076 0.711443305015564 0.4011164605617523\n",
      "sparsity check 0.1999664306640625\n",
      "layer2.3.conv1 5.455206394195557 10.13028621673584 38.818328857421875\n",
      "nz 1638 27853 torch.Size([128, 1152])\n",
      "err_prefin 13.522199630737305 0.2209654152393341 0.877517819404602 0.25908392667770386\n",
      "sparsity check 0.1999850802951389\n",
      "layer2.3.conv2 13.522200584411621 20.69689178466797 74.12933349609375\n",
      "nz 1638 11469 torch.Size([128, 512])\n",
      "err_prefin 4.682408332824707 0.2956201732158661 0.4240821599960327 0.7764149904251099\n",
      "sparsity check 0.1999664306640625\n",
      "layer2.3.conv3 4.682408332824707 7.681174278259277 38.846248626708984\n",
      "nz 6553 19661 torch.Size([256, 512])\n",
      "err_prefin 13.182709693908691 0.3425379693508148 0.7875776290893555 0.4518364667892456\n",
      "sparsity check 0.199981689453125\n",
      "layer3.0.conv1 13.182708740234375 28.589740753173828 124.27618408203125\n",
      "nz 6553 111411 torch.Size([256, 2304])\n",
      "err_prefin 25.442371368408203 0.2007666528224945 0.9013418555259705 0.2519935369491577\n",
      "sparsity check 0.19999525282118055\n",
      "layer3.0.conv2 25.442371368408203 45.83068084716797 177.90823364257812\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 17.660974502563477 0.32124170660972595 0.5237118601799011 0.833233654499054\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.0.conv3 17.660974502563477 33.26232147216797 143.28042602539062\n",
      "nz 26214 78643 torch.Size([512, 1024])\n",
      "err_prefin 11.516027450561523 0.34601572155952454 0.3870190978050232 0.6866859197616577\n",
      "sparsity check 0.1999950408935547\n",
      "layer3.0.downsample.0 11.516027450561523 28.355655670166016 136.73268127441406\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 6.124660491943359 0.29417240619659424 0.6684677600860596 0.3856883645057678\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.1.conv1 6.124660015106201 13.535725593566895 60.75559616088867\n",
      "nz 6553 111411 torch.Size([256, 2304])\n",
      "err_prefin 18.496620178222656 0.2634257674217224 0.8439440131187439 0.3619099259376526\n",
      "sparsity check 0.19999525282118055\n",
      "layer3.1.conv2 18.496618270874023 33.73115539550781 134.50469970703125\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 15.073537826538086 0.4968879222869873 0.467986524105072 1.0162488222122192\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.1.conv3 15.073538780212402 25.870765686035156 108.93757629394531\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 7.746004104614258 0.2714691460132599 0.7372673749923706 0.3577894866466522\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.2.conv1 7.746004104614258 15.378896713256836 65.09668731689453\n",
      "nz 6553 111411 torch.Size([256, 2304])\n",
      "err_prefin 21.30065155029297 0.21002456545829773 0.8902699947357178 0.2412547469139099\n",
      "sparsity check 0.19999525282118055\n",
      "layer3.2.conv2 21.300655364990234 36.866310119628906 133.0740509033203\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 13.256702423095703 0.3537616431713104 0.44523000717163086 0.827569305896759\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.2.conv3 13.256702423095703 23.891357421875 95.56613159179688\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 11.774789810180664 0.2390037477016449 0.7688807845115662 0.3071865737438202\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.3.conv1 11.774789810180664 20.986347198486328 79.00544738769531\n",
      "nz 6553 111411 torch.Size([256, 2304])\n",
      "err_prefin 24.48256492614746 0.27921295166015625 0.8470867872238159 0.3663453459739685\n",
      "sparsity check 0.19999525282118055\n",
      "layer3.3.conv2 24.482566833496094 39.07086944580078 131.74488830566406\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 13.025906562805176 0.3126426041126251 0.43526580929756165 0.8246054649353027\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.3.conv3 13.025907516479492 22.438289642333984 87.29132843017578\n",
      "nz 6553 45875 torch.Size([256, 1024])\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "err_prefin 14.32923698425293 0.2721982002258301 0.7154201865196228 0.41710057854652405\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.4.conv1 14.329236030578613 24.23316192626953 86.25032043457031\n",
      "nz 6553 111411 torch.Size([256, 2304])\n",
      "err_prefin 24.641550064086914 0.19188867509365082 0.8042526245117188 0.22666381299495697\n",
      "sparsity check 0.19999525282118055\n",
      "layer3.4.conv2 24.64154815673828 39.144683837890625 130.9825439453125\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 12.692995071411133 0.316133052110672 0.34903326630592346 0.7881821393966675\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.4.conv3 12.692994117736816 22.032760620117188 87.05435180664062\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 18.37877655029297 0.39949774742126465 0.7821371555328369 0.43698835372924805\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.5.conv1 18.3787784576416 29.850234985351562 102.01531982421875\n",
      "nz 6553 111411 torch.Size([256, 2304])\n",
      "err_prefin 25.215938568115234 0.2235630750656128 0.8415316343307495 0.2765849828720093\n",
      "sparsity check 0.19999525282118055\n",
      "layer3.5.conv2 25.215938568115234 41.364723205566406 137.49560546875\n",
      "nz 6553 45875 torch.Size([256, 1024])\n",
      "err_prefin 14.9464111328125 0.32883593440055847 0.4764803647994995 0.9302998185157776\n",
      "sparsity check 0.19998931884765625\n",
      "layer3.5.conv3 14.9464111328125 26.234996795654297 98.38701629638672\n",
      "nz 26214 78643 torch.Size([512, 1024])\n",
      "err_prefin 44.948020935058594 0.3415152430534363 0.8672410845756531 0.4331424832344055\n",
      "sparsity check 0.1999950408935547\n",
      "layer4.0.conv1 44.948020935058594 91.23966217041016 297.8857116699219\n",
      "nz 26214 445645 torch.Size([512, 4608])\n",
      "err_prefin 63.09944152832031 0.3992723524570465 0.9654605388641357 0.4039965867996216\n",
      "sparsity check 0.19999906751844618\n",
      "layer4.0.conv2 63.09944152832031 120.32373046875 372.49163818359375\n",
      "nz 26214 183501 torch.Size([512, 2048])\n",
      "err_prefin 42.43503189086914 0.3546474874019623 0.37770408391952515 0.8533205389976501\n",
      "sparsity check 0.1999979019165039\n",
      "layer4.0.conv3 42.43503189086914 73.95357513427734 245.60366821289062\n",
      "nz 104857 314573 torch.Size([1024, 2048])\n",
      "err_prefin 26.34392547607422 0.6412832736968994 0.7680420875549316 0.8119863271713257\n",
      "sparsity check 0.1999988555908203\n",
      "layer4.0.downsample.0 26.34392547607422 59.82698440551758 207.0251007080078\n",
      "nz 26214 183501 torch.Size([512, 2048])\n",
      "err_prefin 39.7100830078125 0.700333833694458 0.7904388308525085 0.7880191206932068\n",
      "sparsity check 0.1999979019165039\n",
      "layer4.1.conv1 39.710079193115234 71.75401306152344 228.15423583984375\n",
      "nz 26214 445645 torch.Size([512, 4608])\n",
      "err_prefin 68.95097351074219 0.22574764490127563 0.9398110508918762 0.2284851223230362\n",
      "sparsity check 0.19999906751844618\n",
      "layer4.1.conv2 68.95096588134766 119.9652099609375 358.32769775390625\n",
      "nz 26214 183501 torch.Size([512, 2048])\n",
      "err_prefin 40.206336975097656 0.24268335103988647 0.3163236677646637 0.6726478934288025\n",
      "sparsity check 0.1999979019165039\n",
      "layer4.1.conv3 40.206336975097656 71.25874328613281 234.42181396484375\n",
      "nz 26214 183501 torch.Size([512, 2048])\n",
      "err_prefin 64.76583099365234 0.4540838599205017 0.9419254660606384 0.3813920319080353\n",
      "sparsity check 0.1999979019165039\n",
      "layer4.2.conv1 64.76583862304688 110.56641387939453 345.0306396484375\n",
      "nz 26214 445645 torch.Size([512, 4608])\n",
      "err_prefin 40.330833435058594 0.14155906438827515 0.7333163619041443 0.17385616898536682\n",
      "sparsity check 0.19999906751844618\n",
      "layer4.2.conv2 40.330833435058594 97.54340362548828 288.8470764160156\n",
      "nz 26214 183501 torch.Size([512, 2048])\n",
      "err_prefin 29.03312110900879 0.279774934053421 0.3595486283302307 0.6411212086677551\n",
      "sparsity check 0.1999979019165039\n",
      "layer4.2.conv3 29.03312110900879 58.54290008544922 210.6678466796875\n",
      "change layer1.0.conv1\n",
      "change layer1.0.conv2\n",
      "change layer1.0.conv3\n",
      "change layer1.0.downsample.0\n",
      "change layer1.1.conv1\n",
      "change layer1.1.conv2\n",
      "change layer1.1.conv3\n",
      "change layer1.2.conv1\n",
      "change layer1.2.conv2\n",
      "change layer1.2.conv3\n",
      "change layer2.0.conv1\n",
      "change layer2.0.conv2\n",
      "change layer2.0.conv3\n",
      "change layer2.0.downsample.0\n",
      "change layer2.1.conv1\n",
      "change layer2.1.conv2\n",
      "change layer2.1.conv3\n",
      "change layer2.2.conv1\n",
      "change layer2.2.conv2\n",
      "change layer2.2.conv3\n",
      "change layer2.3.conv1\n",
      "change layer2.3.conv2\n",
      "change layer2.3.conv3\n",
      "change layer3.0.conv1\n",
      "change layer3.0.conv2\n",
      "change layer3.0.conv3\n",
      "change layer3.0.downsample.0\n",
      "change layer3.1.conv1\n",
      "change layer3.1.conv2\n",
      "change layer3.1.conv3\n",
      "change layer3.2.conv1\n",
      "change layer3.2.conv2\n",
      "change layer3.2.conv3\n",
      "change layer3.3.conv1\n",
      "change layer3.3.conv2\n",
      "change layer3.3.conv3\n",
      "change layer3.4.conv1\n",
      "change layer3.4.conv2\n",
      "change layer3.4.conv3\n",
      "change layer3.5.conv1\n",
      "change layer3.5.conv2\n",
      "change layer3.5.conv3\n",
      "change layer4.0.conv1\n",
      "change layer4.0.conv2\n",
      "change layer4.0.conv3\n",
      "change layer4.0.downsample.0\n",
      "change layer4.1.conv1\n",
      "change layer4.1.conv2\n",
      "change layer4.1.conv3\n",
      "change layer4.2.conv1\n",
      "change layer4.2.conv2\n",
      "change layer4.2.conv3\n",
      "torch.Size([64, 64, 1, 1]) torch.Size([64, 64]) torch.Size([64, 64])\n",
      "torch.Size([64, 64, 3, 3]) torch.Size([64, 64]) torch.Size([64, 576])\n",
      "torch.Size([256, 64, 1, 1]) torch.Size([256, 64]) torch.Size([64, 64])\n",
      "torch.Size([64, 256, 1, 1]) torch.Size([64, 64]) torch.Size([64, 256])\n",
      "torch.Size([64, 64, 3, 3]) torch.Size([64, 64]) torch.Size([64, 576])\n",
      "torch.Size([64, 256, 1, 1]) torch.Size([64, 64]) torch.Size([64, 256])\n",
      "torch.Size([64, 64, 3, 3]) torch.Size([64, 64]) torch.Size([64, 576])\n",
      "torch.Size([128, 256, 1, 1]) torch.Size([128, 128]) torch.Size([128, 256])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([512, 256, 1, 1]) torch.Size([512, 256]) torch.Size([256, 256])\n",
      "torch.Size([128, 512, 1, 1]) torch.Size([128, 128]) torch.Size([128, 512])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([128, 512, 1, 1]) torch.Size([128, 128]) torch.Size([128, 512])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([128, 512, 1, 1]) torch.Size([128, 128]) torch.Size([128, 512])\n",
      "torch.Size([128, 128, 3, 3]) torch.Size([128, 128]) torch.Size([128, 1152])\n",
      "torch.Size([256, 512, 1, 1]) torch.Size([256, 256]) torch.Size([256, 512])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([1024, 512, 1, 1]) torch.Size([1024, 512]) torch.Size([512, 512])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([256, 1024, 1, 1]) torch.Size([256, 256]) torch.Size([256, 1024])\n",
      "torch.Size([256, 256, 3, 3]) torch.Size([256, 256]) torch.Size([256, 2304])\n",
      "torch.Size([512, 1024, 1, 1]) torch.Size([512, 512]) torch.Size([512, 1024])\n",
      "torch.Size([512, 512, 3, 3]) torch.Size([512, 512]) torch.Size([512, 4608])\n",
      "torch.Size([2048, 1024, 1, 1]) torch.Size([2048, 1024]) torch.Size([1024, 1024])\n",
      "torch.Size([512, 2048, 1, 1]) torch.Size([512, 512]) torch.Size([512, 2048])\n",
      "torch.Size([512, 512, 3, 3]) torch.Size([512, 512]) torch.Size([512, 4608])\n",
      "torch.Size([512, 2048, 1, 1]) torch.Size([512, 512]) torch.Size([512, 2048])\n",
      "torch.Size([512, 512, 3, 3]) torch.Size([512, 512]) torch.Size([512, 4608])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "ResNet(\n",
      "  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
      "  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "  (relu): ReLU(inplace=True)\n",
      "  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
      "  (layer1): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "          (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (layer2): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "          (1): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (layer3): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "          (1): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (3): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (4): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (5): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (layer4): Sequential(\n",
      "    (0): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (downsample): Sequential(\n",
      "        (0): Sequential(\n",
      "          (0): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "          (1): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        )\n",
      "        (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      )\n",
      "      (conv1b): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (sb): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
      "    )\n",
      "    (1): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "    (2): Bottleneck(\n",
      "      (conv1): Sequential(\n",
      "        (0): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv2): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (conv3): Sequential(\n",
      "        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "        (1): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      )\n",
      "      (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
      "      (relu): ReLU(inplace=True)\n",
      "      (conv1b): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "      (conv2b): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
      "      (conv3b): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
      "    )\n",
      "  )\n",
      "  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
      "  (fc): Linear(in_features=2048, out_features=1000, bias=True)\n",
      ")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [  0/196]\tTime  5.105 ( 5.105)\tLoss 1.0513e+00 (1.0513e+00)\tAcc@1  69.92 ( 69.92)\tAcc@5  94.14 ( 94.14)\n",
      "Test: [ 50/196]\tTime  0.430 ( 0.528)\tLoss 1.5610e+00 (1.3577e+00)\tAcc@1  64.45 ( 66.84)\tAcc@5  87.50 ( 88.99)\n",
      "Test: [100/196]\tTime  0.431 ( 0.481)\tLoss 2.2911e+00 (1.5393e+00)\tAcc@1  42.58 ( 63.93)\tAcc@5  77.73 ( 86.24)\n",
      "Test: [150/196]\tTime  0.432 ( 0.465)\tLoss 2.4797e+00 (1.7323e+00)\tAcc@1  48.83 ( 60.39)\tAcc@5  69.53 ( 83.22)\n",
      "start acc no bn 59.27799987792969\n",
      "Epoch: [-1][   0/5005]\tTime  3.350 ( 3.350)\tData  2.453 ( 2.453)\tLoss 8.8577e-01 (8.8577e-01)\n",
      "Epoch: [-1][  50/5005]\tTime  0.636 ( 0.692)\tData  0.000 ( 0.048)\tLoss 9.4141e-01 (9.3714e-01)\n",
      "Test: [  0/196]\tTime  3.365 ( 3.365)\tLoss 6.2836e-01 (6.2836e-01)\tAcc@1  81.25 ( 81.25)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.432 ( 0.490)\tLoss 6.3255e-01 (8.2611e-01)\tAcc@1  85.55 ( 77.88)\tAcc@5  95.31 ( 94.55)\n",
      "Test: [100/196]\tTime  0.434 ( 0.462)\tLoss 1.3621e+00 (9.5569e-01)\tAcc@1  64.84 ( 75.23)\tAcc@5  89.06 ( 93.01)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.2327e+00 (1.0817e+00)\tAcc@1  72.27 ( 72.87)\tAcc@5  86.72 ( 91.36)\n",
      "start acc bn 71.91799926757812 4688969\n",
      "Epoch: [0][   0/5005]\tTime  3.142 ( 3.142)\tData  2.493 ( 2.493)\tLoss 8.3022e-01 (8.3022e-01)\n",
      "Epoch: [0][  50/5005]\tTime  0.638 ( 0.687)\tData  0.000 ( 0.049)\tLoss 8.1046e-01 (9.4455e-01)\n",
      "Epoch: [0][ 100/5005]\tTime  0.637 ( 0.663)\tData  0.000 ( 0.025)\tLoss 1.1087e+00 (9.4796e-01)\n",
      "Epoch: [0][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 8.3573e-01 (9.6274e-01)\n",
      "Epoch: [0][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 1.0318e+00 (9.6488e-01)\n",
      "Epoch: [0][ 250/5005]\tTime  0.638 ( 0.648)\tData  0.000 ( 0.010)\tLoss 1.0617e+00 (9.6966e-01)\n",
      "Epoch: [0][ 300/5005]\tTime  0.638 ( 0.646)\tData  0.000 ( 0.008)\tLoss 9.8620e-01 (9.7059e-01)\n",
      "Epoch: [0][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.007)\tLoss 8.7454e-01 (9.7477e-01)\n",
      "Epoch: [0][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.006)\tLoss 9.2805e-01 (9.7335e-01)\n",
      "Epoch: [0][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.3972e-01 (9.7481e-01)\n",
      "Epoch: [0][ 500/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.005)\tLoss 1.1865e+00 (9.7476e-01)\n",
      "Epoch: [0][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.6699e-01 (9.7667e-01)\n",
      "Epoch: [0][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 1.2344e+00 (9.7703e-01)\n",
      "Epoch: [0][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.4924e-01 (9.7858e-01)\n",
      "Epoch: [0][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.7115e-01 (9.7803e-01)\n",
      "Epoch: [0][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.1032e+00 (9.7801e-01)\n",
      "Epoch: [0][ 800/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.2183e+00 (9.8012e-01)\n",
      "Epoch: [0][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.3017e-01 (9.7907e-01)\n",
      "Epoch: [0][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0787e+00 (9.7783e-01)\n",
      "Epoch: [0][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0600e+00 (9.7746e-01)\n",
      "Epoch: [0][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.2262e+00 (9.7682e-01)\n",
      "Epoch: [0][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0678e+00 (9.7769e-01)\n",
      "Epoch: [0][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.4785e-01 (9.7704e-01)\n",
      "Epoch: [0][1150/5005]\tTime  0.639 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.5671e-01 (9.7653e-01)\n",
      "Epoch: [0][1200/5005]\tTime  0.639 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.1801e+00 (9.7686e-01)\n",
      "Epoch: [0][1250/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.1020e-01 (9.7663e-01)\n",
      "Epoch: [0][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0369e+00 (9.7663e-01)\n",
      "Epoch: [0][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0483e+00 (9.7620e-01)\n",
      "Epoch: [0][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1029e+00 (9.7701e-01)\n",
      "Epoch: [0][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0551e+00 (9.7632e-01)\n",
      "Epoch: [0][1500/5005]\tTime  0.642 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1288e+00 (9.7611e-01)\n",
      "Epoch: [0][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9320e-01 (9.7602e-01)\n",
      "Epoch: [0][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7283e-01 (9.7618e-01)\n",
      "Epoch: [0][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0307e+00 (9.7641e-01)\n",
      "Epoch: [0][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2442e-01 (9.7644e-01)\n",
      "Epoch: [0][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0856e+00 (9.7619e-01)\n",
      "Epoch: [0][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0513e+00 (9.7559e-01)\n",
      "Epoch: [0][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6183e-01 (9.7537e-01)\n",
      "Epoch: [0][1900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0316e+00 (9.7562e-01)\n",
      "Epoch: [0][1950/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.8033e-01 (9.7559e-01)\n",
      "Epoch: [0][2000/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.2172e-01 (9.7531e-01)\n",
      "Epoch: [0][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3157e-01 (9.7528e-01)\n",
      "Epoch: [0][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.6741e-01 (9.7468e-01)\n",
      "Epoch: [0][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.7333e-01 (9.7450e-01)\n",
      "Epoch: [0][2200/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.7321e-01 (9.7534e-01)\n",
      "Epoch: [0][2250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0421e+00 (9.7541e-01)\n",
      "Epoch: [0][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.7661e-01 (9.7527e-01)\n",
      "Epoch: [0][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.7689e-01 (9.7481e-01)\n",
      "Epoch: [0][2400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1330e+00 (9.7462e-01)\n",
      "Epoch: [0][2450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0918e+00 (9.7469e-01)\n",
      "Epoch: [0][2500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0406e+00 (9.7448e-01)\n",
      "Epoch: [0][2550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2259e+00 (9.7475e-01)\n",
      "Epoch: [0][2600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.6438e-01 (9.7485e-01)\n",
      "Epoch: [0][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1010e+00 (9.7517e-01)\n",
      "Epoch: [0][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0822e+00 (9.7492e-01)\n",
      "Epoch: [0][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8824e-01 (9.7483e-01)\n",
      "Epoch: [0][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0338e+00 (9.7499e-01)\n",
      "Epoch: [0][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9828e-01 (9.7540e-01)\n",
      "Epoch: [0][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0260e+00 (9.7525e-01)\n",
      "Epoch: [0][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3539e-01 (9.7514e-01)\n",
      "Epoch: [0][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7077e-01 (9.7538e-01)\n",
      "Epoch: [0][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1554e-01 (9.7546e-01)\n",
      "Epoch: [0][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6953e-01 (9.7502e-01)\n",
      "Epoch: [0][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2767e-01 (9.7504e-01)\n",
      "Epoch: [0][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5422e-01 (9.7493e-01)\n",
      "Epoch: [0][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8890e-01 (9.7498e-01)\n",
      "Epoch: [0][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1138e+00 (9.7578e-01)\n",
      "Epoch: [0][3350/5005]\tTime  0.640 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0939e+00 (9.7597e-01)\n",
      "Epoch: [0][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9040e-01 (9.7594e-01)\n",
      "Epoch: [0][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2063e+00 (9.7627e-01)\n",
      "Epoch: [0][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0315e+00 (9.7615e-01)\n",
      "Epoch: [0][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8162e-01 (9.7633e-01)\n",
      "Epoch: [0][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3722e-01 (9.7581e-01)\n",
      "Epoch: [0][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2068e-01 (9.7551e-01)\n",
      "Epoch: [0][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0295e+00 (9.7627e-01)\n",
      "Epoch: [0][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0098e+00 (9.7673e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [0][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0040e+00 (9.7682e-01)\n",
      "Epoch: [0][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6017e-01 (9.7658e-01)\n",
      "Epoch: [0][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5589e-01 (9.7658e-01)\n",
      "Epoch: [0][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1872e+00 (9.7686e-01)\n",
      "Epoch: [0][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0438e+00 (9.7689e-01)\n",
      "Epoch: [0][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1677e-01 (9.7706e-01)\n",
      "Epoch: [0][4100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4533e-01 (9.7712e-01)\n",
      "Epoch: [0][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0578e+00 (9.7704e-01)\n",
      "Epoch: [0][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0298e+00 (9.7718e-01)\n",
      "Epoch: [0][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7108e-01 (9.7736e-01)\n",
      "Epoch: [0][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0268e+00 (9.7770e-01)\n",
      "Epoch: [0][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1624e+00 (9.7773e-01)\n",
      "Epoch: [0][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2090e+00 (9.7781e-01)\n",
      "Epoch: [0][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0549e-01 (9.7757e-01)\n",
      "Epoch: [0][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0561e-01 (9.7762e-01)\n",
      "Epoch: [0][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0494e+00 (9.7768e-01)\n",
      "Epoch: [0][4600/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0051e+00 (9.7785e-01)\n",
      "Epoch: [0][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6706e-01 (9.7777e-01)\n",
      "Epoch: [0][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0426e+00 (9.7786e-01)\n",
      "Epoch: [0][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1929e-01 (9.7807e-01)\n",
      "Epoch: [0][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8231e-01 (9.7815e-01)\n",
      "Epoch: [0][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3298e-01 (9.7819e-01)\n",
      "Epoch: [0][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9199e-01 (9.7851e-01)\n",
      "Epoch: [0][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2990e-01 (9.7851e-01)\n",
      "Epoch: [0][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6539e-01 (9.7846e-01)\n",
      "Test: [  0/196]\tTime  3.486 ( 3.486)\tLoss 6.2055e-01 (6.2055e-01)\tAcc@1  82.03 ( 82.03)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.493)\tLoss 3.8825e-01 (8.5672e-01)\tAcc@1  91.80 ( 77.01)\tAcc@5  98.05 ( 94.17)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.4428e+00 (9.7968e-01)\tAcc@1  62.11 ( 74.37)\tAcc@5  86.72 ( 92.81)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2867e+00 (1.0988e+00)\tAcc@1  70.70 ( 72.23)\tAcc@5  87.89 ( 91.16)\n",
      "epoch 0 0.9784678676231862 71.40599822998047 0.0095 4688969 0.19999437845311407\n",
      "Epoch: [1][   0/5005]\tTime  3.368 ( 3.368)\tData  2.728 ( 2.728)\tLoss 9.2718e-01 (9.2718e-01)\n",
      "Epoch: [1][  50/5005]\tTime  0.637 ( 0.692)\tData  0.000 ( 0.054)\tLoss 1.0277e+00 (9.5080e-01)\n",
      "Epoch: [1][ 100/5005]\tTime  0.638 ( 0.665)\tData  0.000 ( 0.027)\tLoss 8.8217e-01 (9.4170e-01)\n",
      "Epoch: [1][ 150/5005]\tTime  0.637 ( 0.656)\tData  0.000 ( 0.018)\tLoss 8.9233e-01 (9.3289e-01)\n",
      "Epoch: [1][ 200/5005]\tTime  0.638 ( 0.651)\tData  0.000 ( 0.014)\tLoss 8.4417e-01 (9.3612e-01)\n",
      "Epoch: [1][ 250/5005]\tTime  0.637 ( 0.648)\tData  0.000 ( 0.011)\tLoss 9.1165e-01 (9.3937e-01)\n",
      "Epoch: [1][ 300/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.009)\tLoss 9.4635e-01 (9.3759e-01)\n",
      "Epoch: [1][ 350/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.8160e-01 (9.3758e-01)\n",
      "Epoch: [1][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 8.2849e-01 (9.3935e-01)\n",
      "Epoch: [1][ 450/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.006)\tLoss 9.6279e-01 (9.4033e-01)\n",
      "Epoch: [1][ 500/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.0716e+00 (9.4180e-01)\n",
      "Epoch: [1][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.9389e-01 (9.4220e-01)\n",
      "Epoch: [1][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.3553e-01 (9.4139e-01)\n",
      "Epoch: [1][ 650/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 1.0784e+00 (9.4084e-01)\n",
      "Epoch: [1][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.5988e-01 (9.4060e-01)\n",
      "Epoch: [1][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.4800e-01 (9.4047e-01)\n",
      "Epoch: [1][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.8329e-01 (9.3957e-01)\n",
      "Epoch: [1][ 850/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 1.0623e+00 (9.4010e-01)\n",
      "Epoch: [1][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.3871e-01 (9.4075e-01)\n",
      "Epoch: [1][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.9035e-01 (9.4025e-01)\n",
      "Epoch: [1][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4063e-01 (9.3926e-01)\n",
      "Epoch: [1][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0535e+00 (9.3864e-01)\n",
      "Epoch: [1][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.4020e-01 (9.3835e-01)\n",
      "Epoch: [1][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.3632e-01 (9.3800e-01)\n",
      "Epoch: [1][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.1505e-01 (9.3746e-01)\n",
      "Epoch: [1][1250/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8530e-01 (9.3752e-01)\n",
      "Epoch: [1][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5319e-01 (9.3836e-01)\n",
      "Epoch: [1][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7810e-01 (9.3849e-01)\n",
      "Epoch: [1][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2410e-01 (9.3848e-01)\n",
      "Epoch: [1][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0604e-01 (9.3852e-01)\n",
      "Epoch: [1][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7364e-01 (9.3833e-01)\n",
      "Epoch: [1][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9135e-01 (9.3833e-01)\n",
      "Epoch: [1][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4589e-01 (9.3918e-01)\n",
      "Epoch: [1][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3322e-01 (9.3899e-01)\n",
      "Epoch: [1][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0062e+00 (9.3959e-01)\n",
      "Epoch: [1][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1078e-01 (9.3978e-01)\n",
      "Epoch: [1][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6658e-01 (9.4016e-01)\n",
      "Epoch: [1][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0095e+00 (9.3982e-01)\n",
      "Epoch: [1][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3808e-01 (9.3963e-01)\n",
      "Epoch: [1][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4953e-01 (9.4020e-01)\n",
      "Epoch: [1][2000/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1168e-01 (9.4064e-01)\n",
      "Epoch: [1][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1593e+00 (9.4140e-01)\n",
      "Epoch: [1][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0514e+00 (9.4218e-01)\n",
      "Epoch: [1][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.1716e+00 (9.4281e-01)\n",
      "Epoch: [1][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0853e+00 (9.4311e-01)\n",
      "Epoch: [1][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6348e-01 (9.4304e-01)\n",
      "Epoch: [1][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9767e-01 (9.4286e-01)\n",
      "Epoch: [1][2350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1314e-01 (9.4334e-01)\n",
      "Epoch: [1][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3375e-01 (9.4396e-01)\n",
      "Epoch: [1][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8505e-01 (9.4443e-01)\n",
      "Epoch: [1][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7600e-01 (9.4491e-01)\n",
      "Epoch: [1][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0661e+00 (9.4533e-01)\n",
      "Epoch: [1][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4227e-01 (9.4538e-01)\n",
      "Epoch: [1][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0199e+00 (9.4558e-01)\n",
      "Epoch: [1][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1606e-01 (9.4567e-01)\n",
      "Epoch: [1][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0458e+00 (9.4553e-01)\n",
      "Epoch: [1][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1959e-01 (9.4565e-01)\n",
      "Epoch: [1][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0032e+00 (9.4588e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [1][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5072e-01 (9.4554e-01)\n",
      "Epoch: [1][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1253e+00 (9.4584e-01)\n",
      "Epoch: [1][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1520e-01 (9.4580e-01)\n",
      "Epoch: [1][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1434e-01 (9.4604e-01)\n",
      "Epoch: [1][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0132e+00 (9.4633e-01)\n",
      "Epoch: [1][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0273e+00 (9.4666e-01)\n",
      "Epoch: [1][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1164e+00 (9.4720e-01)\n",
      "Epoch: [1][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0070e+00 (9.4699e-01)\n",
      "Epoch: [1][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2939e-01 (9.4681e-01)\n",
      "Epoch: [1][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3629e-01 (9.4676e-01)\n",
      "Epoch: [1][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9171e-01 (9.4681e-01)\n",
      "Epoch: [1][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0380e+00 (9.4679e-01)\n",
      "Epoch: [1][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1742e+00 (9.4704e-01)\n",
      "Epoch: [1][3550/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0214e+00 (9.4710e-01)\n",
      "Epoch: [1][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4724e-01 (9.4711e-01)\n",
      "Epoch: [1][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0903e+00 (9.4764e-01)\n",
      "Epoch: [1][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2787e-01 (9.4790e-01)\n",
      "Epoch: [1][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9363e-01 (9.4813e-01)\n",
      "Epoch: [1][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1313e+00 (9.4849e-01)\n",
      "Epoch: [1][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7358e-01 (9.4877e-01)\n",
      "Epoch: [1][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0650e+00 (9.4924e-01)\n",
      "Epoch: [1][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4132e-01 (9.4951e-01)\n",
      "Epoch: [1][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1627e+00 (9.4956e-01)\n",
      "Epoch: [1][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6104e-01 (9.4994e-01)\n",
      "Epoch: [1][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9411e-01 (9.5008e-01)\n",
      "Epoch: [1][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0724e+00 (9.5035e-01)\n",
      "Epoch: [1][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4999e-01 (9.5020e-01)\n",
      "Epoch: [1][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5860e-01 (9.5023e-01)\n",
      "Epoch: [1][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1351e+00 (9.5034e-01)\n",
      "Epoch: [1][4350/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1289e-01 (9.5028e-01)\n",
      "Epoch: [1][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4386e-01 (9.5043e-01)\n",
      "Epoch: [1][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2619e-01 (9.5038e-01)\n",
      "Epoch: [1][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8739e-01 (9.5075e-01)\n",
      "Epoch: [1][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0133e-01 (9.5111e-01)\n",
      "Epoch: [1][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9466e-01 (9.5129e-01)\n",
      "Epoch: [1][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0692e+00 (9.5156e-01)\n",
      "Epoch: [1][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8555e-01 (9.5168e-01)\n",
      "Epoch: [1][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0045e-01 (9.5174e-01)\n",
      "Epoch: [1][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6779e-01 (9.5203e-01)\n",
      "Epoch: [1][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2737e+00 (9.5228e-01)\n",
      "Epoch: [1][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2501e-01 (9.5231e-01)\n",
      "Epoch: [1][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0625e+00 (9.5248e-01)\n",
      "Epoch: [1][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1193e-01 (9.5269e-01)\n",
      "Test: [  0/196]\tTime  3.401 ( 3.401)\tLoss 5.8170e-01 (5.8170e-01)\tAcc@1  82.42 ( 82.42)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.492)\tLoss 6.0999e-01 (8.5238e-01)\tAcc@1  84.38 ( 77.14)\tAcc@5  96.88 ( 94.25)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.5837e+00 (9.7979e-01)\tAcc@1  55.47 ( 74.39)\tAcc@5  87.11 ( 92.62)\n",
      "Test: [150/196]\tTime  0.434 ( 0.453)\tLoss 1.1371e+00 (1.1081e+00)\tAcc@1  73.05 ( 71.96)\tAcc@5  88.67 ( 90.93)\n",
      "epoch 1 0.952634817545056 71.06399536132812 0.009000000000000001 4688969 0.19999437845311407\n",
      "Epoch: [2][   0/5005]\tTime  3.035 ( 3.035)\tData  2.396 ( 2.396)\tLoss 9.4341e-01 (9.4341e-01)\n",
      "Epoch: [2][  50/5005]\tTime  0.637 ( 0.685)\tData  0.000 ( 0.047)\tLoss 1.0202e+00 (9.0006e-01)\n",
      "Epoch: [2][ 100/5005]\tTime  0.638 ( 0.661)\tData  0.000 ( 0.024)\tLoss 1.0306e+00 (8.9563e-01)\n",
      "Epoch: [2][ 150/5005]\tTime  0.638 ( 0.653)\tData  0.000 ( 0.016)\tLoss 7.7377e-01 (8.9714e-01)\n",
      "Epoch: [2][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 9.4552e-01 (9.0821e-01)\n",
      "Epoch: [2][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.5887e-01 (9.0656e-01)\n",
      "Epoch: [2][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 9.3070e-01 (9.0921e-01)\n",
      "Epoch: [2][ 350/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 9.8608e-01 (9.1096e-01)\n",
      "Epoch: [2][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.1457e-01 (9.1225e-01)\n",
      "Epoch: [2][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.7846e-01 (9.1115e-01)\n",
      "Epoch: [2][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.7954e-01 (9.1132e-01)\n",
      "Epoch: [2][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.3727e-01 (9.1132e-01)\n",
      "Epoch: [2][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.8146e-01 (9.1253e-01)\n",
      "Epoch: [2][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.3207e-01 (9.1245e-01)\n",
      "Epoch: [2][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.9603e-01 (9.1513e-01)\n",
      "Epoch: [2][ 750/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.9615e-01 (9.1544e-01)\n",
      "Epoch: [2][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9909e-01 (9.1660e-01)\n",
      "Epoch: [2][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9739e-01 (9.1828e-01)\n",
      "Epoch: [2][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.2259e+00 (9.1974e-01)\n",
      "Epoch: [2][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.7506e-01 (9.2087e-01)\n",
      "Epoch: [2][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.5516e-01 (9.2200e-01)\n",
      "Epoch: [2][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6495e-01 (9.2115e-01)\n",
      "Epoch: [2][1100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7473e-01 (9.2060e-01)\n",
      "Epoch: [2][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7322e-01 (9.2125e-01)\n",
      "Epoch: [2][1200/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0172e+00 (9.2221e-01)\n",
      "Epoch: [2][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6409e-01 (9.2269e-01)\n",
      "Epoch: [2][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6161e-01 (9.2403e-01)\n",
      "Epoch: [2][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0778e-01 (9.2450e-01)\n",
      "Epoch: [2][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0543e+00 (9.2464e-01)\n",
      "Epoch: [2][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0589e+00 (9.2462e-01)\n",
      "Epoch: [2][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3921e-01 (9.2351e-01)\n",
      "Epoch: [2][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4922e-01 (9.2406e-01)\n",
      "Epoch: [2][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0579e+00 (9.2460e-01)\n",
      "Epoch: [2][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4697e-01 (9.2538e-01)\n",
      "Epoch: [2][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0863e+00 (9.2540e-01)\n",
      "Epoch: [2][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8530e-01 (9.2577e-01)\n",
      "Epoch: [2][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1087e-01 (9.2564e-01)\n",
      "Epoch: [2][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6107e-01 (9.2553e-01)\n",
      "Epoch: [2][1900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0567e+00 (9.2578e-01)\n",
      "Epoch: [2][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8908e-01 (9.2591e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [2][2000/5005]\tTime  0.643 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9436e-01 (9.2615e-01)\n",
      "Epoch: [2][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0446e-01 (9.2628e-01)\n",
      "Epoch: [2][2100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6251e-01 (9.2641e-01)\n",
      "Epoch: [2][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0792e+00 (9.2630e-01)\n",
      "Epoch: [2][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8186e-01 (9.2608e-01)\n",
      "Epoch: [2][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5682e-01 (9.2584e-01)\n",
      "Epoch: [2][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4813e-01 (9.2641e-01)\n",
      "Epoch: [2][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6138e-01 (9.2646e-01)\n",
      "Epoch: [2][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0520e+00 (9.2665e-01)\n",
      "Epoch: [2][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6660e-01 (9.2666e-01)\n",
      "Epoch: [2][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1431e-01 (9.2642e-01)\n",
      "Epoch: [2][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1758e+00 (9.2670e-01)\n",
      "Epoch: [2][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2507e-01 (9.2681e-01)\n",
      "Epoch: [2][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4580e-01 (9.2690e-01)\n",
      "Epoch: [2][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1433e-01 (9.2716e-01)\n",
      "Epoch: [2][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0540e-01 (9.2804e-01)\n",
      "Epoch: [2][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1332e+00 (9.2832e-01)\n",
      "Epoch: [2][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0637e+00 (9.2850e-01)\n",
      "Epoch: [2][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7814e-01 (9.2898e-01)\n",
      "Epoch: [2][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0070e+00 (9.2952e-01)\n",
      "Epoch: [2][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8837e-01 (9.2976e-01)\n",
      "Epoch: [2][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0084e+00 (9.2998e-01)\n",
      "Epoch: [2][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0574e-01 (9.3021e-01)\n",
      "Epoch: [2][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0328e+00 (9.3046e-01)\n",
      "Epoch: [2][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7267e-01 (9.3050e-01)\n",
      "Epoch: [2][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4825e-01 (9.3051e-01)\n",
      "Epoch: [2][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7987e-01 (9.3066e-01)\n",
      "Epoch: [2][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8190e-01 (9.3098e-01)\n",
      "Epoch: [2][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0459e+00 (9.3122e-01)\n",
      "Epoch: [2][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4537e-01 (9.3099e-01)\n",
      "Epoch: [2][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5632e-01 (9.3138e-01)\n",
      "Epoch: [2][3550/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6506e-01 (9.3185e-01)\n",
      "Epoch: [2][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2356e-01 (9.3197e-01)\n",
      "Epoch: [2][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7301e-01 (9.3221e-01)\n",
      "Epoch: [2][3700/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1763e-01 (9.3259e-01)\n",
      "Epoch: [2][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2762e-01 (9.3280e-01)\n",
      "Epoch: [2][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1807e-01 (9.3305e-01)\n",
      "Epoch: [2][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5000e-01 (9.3321e-01)\n",
      "Epoch: [2][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7766e-01 (9.3325e-01)\n",
      "Epoch: [2][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8545e-01 (9.3335e-01)\n",
      "Epoch: [2][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9129e-01 (9.3341e-01)\n",
      "Epoch: [2][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1095e+00 (9.3392e-01)\n",
      "Epoch: [2][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0129e-01 (9.3382e-01)\n",
      "Epoch: [2][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3653e-01 (9.3430e-01)\n",
      "Epoch: [2][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6685e-01 (9.3460e-01)\n",
      "Epoch: [2][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1311e+00 (9.3480e-01)\n",
      "Epoch: [2][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0406e-01 (9.3494e-01)\n",
      "Epoch: [2][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9268e-01 (9.3524e-01)\n",
      "Epoch: [2][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8976e-01 (9.3537e-01)\n",
      "Epoch: [2][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0647e+00 (9.3547e-01)\n",
      "Epoch: [2][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4142e-01 (9.3585e-01)\n",
      "Epoch: [2][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1616e-01 (9.3624e-01)\n",
      "Epoch: [2][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6257e-01 (9.3639e-01)\n",
      "Epoch: [2][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1331e+00 (9.3695e-01)\n",
      "Epoch: [2][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0499e+00 (9.3701e-01)\n",
      "Epoch: [2][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5070e-01 (9.3751e-01)\n",
      "Epoch: [2][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8862e-01 (9.3779e-01)\n",
      "Epoch: [2][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4702e-01 (9.3801e-01)\n",
      "Epoch: [2][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0189e+00 (9.3843e-01)\n",
      "Epoch: [2][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4867e-01 (9.3865e-01)\n",
      "Epoch: [2][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7412e-01 (9.3885e-01)\n",
      "Test: [  0/196]\tTime  3.460 ( 3.460)\tLoss 6.2945e-01 (6.2945e-01)\tAcc@1  82.03 ( 82.03)\tAcc@5  94.92 ( 94.92)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.493)\tLoss 5.9927e-01 (8.7410e-01)\tAcc@1  83.20 ( 76.71)\tAcc@5  96.09 ( 93.96)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.4469e+00 (1.0020e+00)\tAcc@1  62.89 ( 73.93)\tAcc@5  87.11 ( 92.55)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2950e+00 (1.1097e+00)\tAcc@1  70.70 ( 71.93)\tAcc@5  87.89 ( 91.07)\n",
      "epoch 2 0.9388358168750356 70.95999908447266 0.0085 4688969 0.19999437845311407\n",
      "Epoch: [3][   0/5005]\tTime  3.062 ( 3.062)\tData  2.425 ( 2.425)\tLoss 9.8522e-01 (9.8522e-01)\n",
      "Epoch: [3][  50/5005]\tTime  0.638 ( 0.685)\tData  0.000 ( 0.048)\tLoss 8.5288e-01 (9.1445e-01)\n",
      "Epoch: [3][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.024)\tLoss 8.5850e-01 (9.1170e-01)\n",
      "Epoch: [3][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 7.6452e-01 (8.9877e-01)\n",
      "Epoch: [3][ 200/5005]\tTime  0.638 ( 0.649)\tData  0.000 ( 0.012)\tLoss 9.5081e-01 (8.9655e-01)\n",
      "Epoch: [3][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.0897e-01 (8.9871e-01)\n",
      "Epoch: [3][ 300/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.008)\tLoss 9.1017e-01 (9.0088e-01)\n",
      "Epoch: [3][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 1.0325e+00 (9.0232e-01)\n",
      "Epoch: [3][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.4834e-01 (9.0580e-01)\n",
      "Epoch: [3][ 450/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.8466e-01 (9.0382e-01)\n",
      "Epoch: [3][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.5829e-01 (9.0493e-01)\n",
      "Epoch: [3][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.0124e-01 (9.0392e-01)\n",
      "Epoch: [3][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0165e+00 (9.0430e-01)\n",
      "Epoch: [3][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0704e+00 (9.0511e-01)\n",
      "Epoch: [3][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.2662e-01 (9.0640e-01)\n",
      "Epoch: [3][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.1909e-01 (9.0733e-01)\n",
      "Epoch: [3][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0188e+00 (9.0728e-01)\n",
      "Epoch: [3][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.2117e-01 (9.0775e-01)\n",
      "Epoch: [3][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.5918e-01 (9.0919e-01)\n",
      "Epoch: [3][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.5323e-01 (9.0886e-01)\n",
      "Epoch: [3][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4872e-01 (9.0882e-01)\n",
      "Epoch: [3][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0259e+00 (9.0972e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [3][1100/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7582e-01 (9.0950e-01)\n",
      "Epoch: [3][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1824e-01 (9.1027e-01)\n",
      "Epoch: [3][1200/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3834e-01 (9.1020e-01)\n",
      "Epoch: [3][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8642e-01 (9.1207e-01)\n",
      "Epoch: [3][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4246e-01 (9.1241e-01)\n",
      "Epoch: [3][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3500e-01 (9.1280e-01)\n",
      "Epoch: [3][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3417e-01 (9.1317e-01)\n",
      "Epoch: [3][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1095e-01 (9.1413e-01)\n",
      "Epoch: [3][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1552e-01 (9.1503e-01)\n",
      "Epoch: [3][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3601e-01 (9.1503e-01)\n",
      "Epoch: [3][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1997e-01 (9.1508e-01)\n",
      "Epoch: [3][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.7782e-01 (9.1524e-01)\n",
      "Epoch: [3][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5287e-01 (9.1548e-01)\n",
      "Epoch: [3][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5137e-01 (9.1613e-01)\n",
      "Epoch: [3][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6389e-01 (9.1611e-01)\n",
      "Epoch: [3][1850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 9.2117e-01 (9.1576e-01)\n",
      "Epoch: [3][1900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3187e-01 (9.1550e-01)\n",
      "Epoch: [3][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9087e-01 (9.1560e-01)\n",
      "Epoch: [3][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0303e+00 (9.1614e-01)\n",
      "Epoch: [3][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0208e+00 (9.1677e-01)\n",
      "Epoch: [3][2100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8210e-01 (9.1683e-01)\n",
      "Epoch: [3][2150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3302e-01 (9.1684e-01)\n",
      "Epoch: [3][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0867e+00 (9.1713e-01)\n",
      "Epoch: [3][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6595e-01 (9.1700e-01)\n",
      "Epoch: [3][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9219e-01 (9.1754e-01)\n",
      "Epoch: [3][2350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0256e+00 (9.1772e-01)\n",
      "Epoch: [3][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0343e+00 (9.1811e-01)\n",
      "Epoch: [3][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8780e-01 (9.1917e-01)\n",
      "Epoch: [3][2500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5019e-01 (9.1990e-01)\n",
      "Epoch: [3][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0578e-01 (9.1998e-01)\n",
      "Epoch: [3][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5991e-01 (9.1991e-01)\n",
      "Epoch: [3][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8739e-01 (9.2030e-01)\n",
      "Epoch: [3][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8703e-01 (9.2075e-01)\n",
      "Epoch: [3][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0755e-01 (9.2106e-01)\n",
      "Epoch: [3][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2136e-01 (9.2112e-01)\n",
      "Epoch: [3][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7305e-01 (9.2082e-01)\n",
      "Epoch: [3][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5359e-01 (9.2094e-01)\n",
      "Epoch: [3][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0341e+00 (9.2129e-01)\n",
      "Epoch: [3][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4607e-01 (9.2167e-01)\n",
      "Epoch: [3][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0255e-01 (9.2176e-01)\n",
      "Epoch: [3][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0287e+00 (9.2214e-01)\n",
      "Epoch: [3][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7901e-01 (9.2270e-01)\n",
      "Epoch: [3][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0346e+00 (9.2287e-01)\n",
      "Epoch: [3][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6685e-01 (9.2325e-01)\n",
      "Epoch: [3][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9385e-01 (9.2347e-01)\n",
      "Epoch: [3][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0232e+00 (9.2299e-01)\n",
      "Epoch: [3][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0258e-01 (9.2302e-01)\n",
      "Epoch: [3][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4318e-01 (9.2313e-01)\n",
      "Epoch: [3][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0856e+00 (9.2322e-01)\n",
      "Epoch: [3][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3091e-01 (9.2349e-01)\n",
      "Epoch: [3][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6437e-01 (9.2331e-01)\n",
      "Epoch: [3][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6714e-01 (9.2315e-01)\n",
      "Epoch: [3][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1167e+00 (9.2374e-01)\n",
      "Epoch: [3][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9483e-01 (9.2391e-01)\n",
      "Epoch: [3][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8623e-01 (9.2417e-01)\n",
      "Epoch: [3][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1502e+00 (9.2423e-01)\n",
      "Epoch: [3][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2051e-01 (9.2439e-01)\n",
      "Epoch: [3][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8308e-01 (9.2427e-01)\n",
      "Epoch: [3][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0694e+00 (9.2440e-01)\n",
      "Epoch: [3][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9643e-01 (9.2475e-01)\n",
      "Epoch: [3][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0219e+00 (9.2499e-01)\n",
      "Epoch: [3][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3332e-01 (9.2537e-01)\n",
      "Epoch: [3][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8189e-01 (9.2553e-01)\n",
      "Epoch: [3][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3638e-01 (9.2563e-01)\n",
      "Epoch: [3][4300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0388e+00 (9.2564e-01)\n",
      "Epoch: [3][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1805e+00 (9.2595e-01)\n",
      "Epoch: [3][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5715e-01 (9.2628e-01)\n",
      "Epoch: [3][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2738e-01 (9.2619e-01)\n",
      "Epoch: [3][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8285e-01 (9.2617e-01)\n",
      "Epoch: [3][4550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7479e-01 (9.2663e-01)\n",
      "Epoch: [3][4600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0009e+00 (9.2673e-01)\n",
      "Epoch: [3][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8058e-01 (9.2698e-01)\n",
      "Epoch: [3][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0944e+00 (9.2712e-01)\n",
      "Epoch: [3][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6207e-01 (9.2712e-01)\n",
      "Epoch: [3][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5733e-01 (9.2748e-01)\n",
      "Epoch: [3][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2495e-01 (9.2776e-01)\n",
      "Epoch: [3][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0764e+00 (9.2795e-01)\n",
      "Epoch: [3][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5334e-01 (9.2828e-01)\n",
      "Epoch: [3][5000/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0439e+00 (9.2865e-01)\n",
      "Test: [  0/196]\tTime  3.454 ( 3.454)\tLoss 7.0138e-01 (7.0138e-01)\tAcc@1  80.08 ( 80.08)\tAcc@5  95.31 ( 95.31)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.493)\tLoss 5.1883e-01 (8.2759e-01)\tAcc@1  87.11 ( 77.81)\tAcc@5  97.27 ( 94.46)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.4934e+00 (9.7168e-01)\tAcc@1  60.16 ( 74.73)\tAcc@5  86.33 ( 92.81)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2022e+00 (1.0962e+00)\tAcc@1  72.27 ( 72.50)\tAcc@5  87.11 ( 91.06)\n",
      "epoch 3 0.9286563033536336 71.6199951171875 0.008000000000000002 4688969 0.19999437845311407\n",
      "Epoch: [4][   0/5005]\tTime  2.963 ( 2.963)\tData  2.321 ( 2.321)\tLoss 9.7952e-01 (9.7952e-01)\n",
      "Epoch: [4][  50/5005]\tTime  0.637 ( 0.684)\tData  0.000 ( 0.046)\tLoss 8.3236e-01 (8.9030e-01)\n",
      "Epoch: [4][ 100/5005]\tTime  0.638 ( 0.661)\tData  0.000 ( 0.023)\tLoss 9.4984e-01 (8.8552e-01)\n",
      "Epoch: [4][ 150/5005]\tTime  0.638 ( 0.653)\tData  0.000 ( 0.016)\tLoss 9.5454e-01 (8.8784e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [4][ 200/5005]\tTime  0.638 ( 0.649)\tData  0.000 ( 0.012)\tLoss 8.8369e-01 (8.8876e-01)\n",
      "Epoch: [4][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.009)\tLoss 8.8810e-01 (8.9360e-01)\n",
      "Epoch: [4][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.0356e-01 (8.8958e-01)\n",
      "Epoch: [4][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 9.7291e-01 (8.8962e-01)\n",
      "Epoch: [4][ 400/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.9406e-01 (8.8908e-01)\n",
      "Epoch: [4][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.005)\tLoss 8.3893e-01 (8.8852e-01)\n",
      "Epoch: [4][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.3253e-01 (8.8844e-01)\n",
      "Epoch: [4][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 9.3823e-01 (8.8976e-01)\n",
      "Epoch: [4][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.3140e-01 (8.9108e-01)\n",
      "Epoch: [4][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.1029e+00 (8.9349e-01)\n",
      "Epoch: [4][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.1288e-01 (8.9251e-01)\n",
      "Epoch: [4][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.0474e-01 (8.9302e-01)\n",
      "Epoch: [4][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4050e-01 (8.9412e-01)\n",
      "Epoch: [4][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4992e-01 (8.9610e-01)\n",
      "Epoch: [4][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.0581e-01 (8.9675e-01)\n",
      "Epoch: [4][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9584e-01 (8.9769e-01)\n",
      "Epoch: [4][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.4105e-01 (8.9883e-01)\n",
      "Epoch: [4][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0015e+00 (8.9866e-01)\n",
      "Epoch: [4][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.0481e-01 (8.9841e-01)\n",
      "Epoch: [4][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3081e-01 (8.9914e-01)\n",
      "Epoch: [4][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9449e-01 (8.9917e-01)\n",
      "Epoch: [4][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9845e-01 (8.9947e-01)\n",
      "Epoch: [4][1300/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1828e-01 (9.0014e-01)\n",
      "Epoch: [4][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4226e-01 (9.0127e-01)\n",
      "Epoch: [4][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9206e-01 (9.0214e-01)\n",
      "Epoch: [4][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0615e+00 (9.0271e-01)\n",
      "Epoch: [4][1500/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9416e-01 (9.0253e-01)\n",
      "Epoch: [4][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5361e-01 (9.0283e-01)\n",
      "Epoch: [4][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.9356e-01 (9.0315e-01)\n",
      "Epoch: [4][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0121e+00 (9.0309e-01)\n",
      "Epoch: [4][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0769e-01 (9.0364e-01)\n",
      "Epoch: [4][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6706e-01 (9.0303e-01)\n",
      "Epoch: [4][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2491e-01 (9.0372e-01)\n",
      "Epoch: [4][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.6180e-01 (9.0445e-01)\n",
      "Epoch: [4][1900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0367e+00 (9.0378e-01)\n",
      "Epoch: [4][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3542e-01 (9.0400e-01)\n",
      "Epoch: [4][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2046e+00 (9.0435e-01)\n",
      "Epoch: [4][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0204e+00 (9.0486e-01)\n",
      "Epoch: [4][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3502e-01 (9.0463e-01)\n",
      "Epoch: [4][2150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3229e-01 (9.0532e-01)\n",
      "Epoch: [4][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0282e+00 (9.0560e-01)\n",
      "Epoch: [4][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3959e-01 (9.0568e-01)\n",
      "Epoch: [4][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1867e-01 (9.0600e-01)\n",
      "Epoch: [4][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7835e-01 (9.0640e-01)\n",
      "Epoch: [4][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3178e-01 (9.0667e-01)\n",
      "Epoch: [4][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0336e+00 (9.0761e-01)\n",
      "Epoch: [4][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8555e-01 (9.0769e-01)\n",
      "Epoch: [4][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7233e-01 (9.0768e-01)\n",
      "Epoch: [4][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6367e-01 (9.0804e-01)\n",
      "Epoch: [4][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7810e-01 (9.0872e-01)\n",
      "Epoch: [4][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0590e-01 (9.0881e-01)\n",
      "Epoch: [4][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8980e-01 (9.0889e-01)\n",
      "Epoch: [4][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2722e-01 (9.0899e-01)\n",
      "Epoch: [4][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9246e-01 (9.0940e-01)\n",
      "Epoch: [4][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2569e-01 (9.0904e-01)\n",
      "Epoch: [4][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3603e-01 (9.0956e-01)\n",
      "Epoch: [4][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5742e-01 (9.0972e-01)\n",
      "Epoch: [4][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0444e+00 (9.1002e-01)\n",
      "Epoch: [4][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0906e-01 (9.1014e-01)\n",
      "Epoch: [4][3150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3550e-01 (9.1027e-01)\n",
      "Epoch: [4][3200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6933e-01 (9.1043e-01)\n",
      "Epoch: [4][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7593e-01 (9.1059e-01)\n",
      "Epoch: [4][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0791e+00 (9.1107e-01)\n",
      "Epoch: [4][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0328e+00 (9.1081e-01)\n",
      "Epoch: [4][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6361e-01 (9.1059e-01)\n",
      "Epoch: [4][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3910e-01 (9.1098e-01)\n",
      "Epoch: [4][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3754e-01 (9.1099e-01)\n",
      "Epoch: [4][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8510e-01 (9.1123e-01)\n",
      "Epoch: [4][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0476e+00 (9.1144e-01)\n",
      "Epoch: [4][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8125e-01 (9.1125e-01)\n",
      "Epoch: [4][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4091e-01 (9.1160e-01)\n",
      "Epoch: [4][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0928e+00 (9.1176e-01)\n",
      "Epoch: [4][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9794e-01 (9.1155e-01)\n",
      "Epoch: [4][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7523e-01 (9.1181e-01)\n",
      "Epoch: [4][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2101e-01 (9.1212e-01)\n",
      "Epoch: [4][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0047e+00 (9.1237e-01)\n",
      "Epoch: [4][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6339e-01 (9.1271e-01)\n",
      "Epoch: [4][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7285e-01 (9.1297e-01)\n",
      "Epoch: [4][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0764e-01 (9.1300e-01)\n",
      "Epoch: [4][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1378e+00 (9.1342e-01)\n",
      "Epoch: [4][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4749e-01 (9.1359e-01)\n",
      "Epoch: [4][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9246e-01 (9.1381e-01)\n",
      "Epoch: [4][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6986e-01 (9.1391e-01)\n",
      "Epoch: [4][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0061e-01 (9.1413e-01)\n",
      "Epoch: [4][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0284e-01 (9.1440e-01)\n",
      "Epoch: [4][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0255e+00 (9.1460e-01)\n",
      "Epoch: [4][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3865e-01 (9.1503e-01)\n",
      "Epoch: [4][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3352e-01 (9.1531e-01)\n",
      "Epoch: [4][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9568e-01 (9.1563e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [4][4650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4641e-01 (9.1590e-01)\n",
      "Epoch: [4][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6151e-01 (9.1615e-01)\n",
      "Epoch: [4][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9285e-01 (9.1625e-01)\n",
      "Epoch: [4][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0336e+00 (9.1669e-01)\n",
      "Epoch: [4][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8146e-01 (9.1679e-01)\n",
      "Epoch: [4][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7186e-01 (9.1689e-01)\n",
      "Epoch: [4][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2179e-01 (9.1690e-01)\n",
      "Epoch: [4][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1062e+00 (9.1722e-01)\n",
      "Test: [  0/196]\tTime  3.458 ( 3.458)\tLoss 6.1560e-01 (6.1560e-01)\tAcc@1  83.20 ( 83.20)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.493)\tLoss 6.0114e-01 (7.9652e-01)\tAcc@1  85.94 ( 78.36)\tAcc@5  96.09 ( 94.89)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.2641e+00 (9.3405e-01)\tAcc@1  65.62 ( 75.44)\tAcc@5  89.45 ( 93.41)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.0837e+00 (1.0570e+00)\tAcc@1  75.78 ( 73.05)\tAcc@5  89.84 ( 91.74)\n",
      "epoch 4 0.9172601720283717 72.0459976196289 0.0075000000000000015 4688969 0.19999437845311407\n",
      "Epoch: [5][   0/5005]\tTime  3.049 ( 3.049)\tData  2.411 ( 2.411)\tLoss 8.7876e-01 (8.7876e-01)\n",
      "Epoch: [5][  50/5005]\tTime  0.637 ( 0.685)\tData  0.000 ( 0.048)\tLoss 7.7613e-01 (8.7001e-01)\n",
      "Epoch: [5][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.024)\tLoss 8.6255e-01 (8.7610e-01)\n",
      "Epoch: [5][ 150/5005]\tTime  0.639 ( 0.654)\tData  0.000 ( 0.016)\tLoss 8.4038e-01 (8.7183e-01)\n",
      "Epoch: [5][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.012)\tLoss 9.4438e-01 (8.7229e-01)\n",
      "Epoch: [5][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.8900e-01 (8.7765e-01)\n",
      "Epoch: [5][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 1.0085e+00 (8.7886e-01)\n",
      "Epoch: [5][ 350/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.007)\tLoss 7.6803e-01 (8.8129e-01)\n",
      "Epoch: [5][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 9.4808e-01 (8.8221e-01)\n",
      "Epoch: [5][ 450/5005]\tTime  0.639 ( 0.643)\tData  0.000 ( 0.006)\tLoss 9.8768e-01 (8.8074e-01)\n",
      "Epoch: [5][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.0343e+00 (8.8075e-01)\n",
      "Epoch: [5][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.7505e-01 (8.8360e-01)\n",
      "Epoch: [5][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 6.3271e-01 (8.8334e-01)\n",
      "Epoch: [5][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0149e+00 (8.8342e-01)\n",
      "Epoch: [5][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0174e+00 (8.8364e-01)\n",
      "Epoch: [5][ 750/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.003)\tLoss 9.7690e-01 (8.8308e-01)\n",
      "Epoch: [5][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.9024e-01 (8.8492e-01)\n",
      "Epoch: [5][ 850/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.0069e-01 (8.8580e-01)\n",
      "Epoch: [5][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.2186e-01 (8.8561e-01)\n",
      "Epoch: [5][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0538e+00 (8.8620e-01)\n",
      "Epoch: [5][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4079e-01 (8.8740e-01)\n",
      "Epoch: [5][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6405e-01 (8.8783e-01)\n",
      "Epoch: [5][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.9002e-01 (8.8834e-01)\n",
      "Epoch: [5][1150/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.7149e-01 (8.8843e-01)\n",
      "Epoch: [5][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 1.0691e+00 (8.8878e-01)\n",
      "Epoch: [5][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5494e-01 (8.8924e-01)\n",
      "Epoch: [5][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3154e-01 (8.9062e-01)\n",
      "Epoch: [5][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8648e-01 (8.9151e-01)\n",
      "Epoch: [5][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0444e-01 (8.9204e-01)\n",
      "Epoch: [5][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1616e-01 (8.9146e-01)\n",
      "Epoch: [5][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7605e-01 (8.9228e-01)\n",
      "Epoch: [5][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1252e-01 (8.9250e-01)\n",
      "Epoch: [5][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9441e-01 (8.9256e-01)\n",
      "Epoch: [5][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2720e-01 (8.9269e-01)\n",
      "Epoch: [5][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7680e-01 (8.9256e-01)\n",
      "Epoch: [5][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1071e-01 (8.9330e-01)\n",
      "Epoch: [5][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0555e+00 (8.9366e-01)\n",
      "Epoch: [5][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9468e-01 (8.9408e-01)\n",
      "Epoch: [5][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0308e+00 (8.9449e-01)\n",
      "Epoch: [5][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1959e-01 (8.9498e-01)\n",
      "Epoch: [5][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.7740e-01 (8.9499e-01)\n",
      "Epoch: [5][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.6474e-01 (8.9518e-01)\n",
      "Epoch: [5][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.0260e-01 (8.9565e-01)\n",
      "Epoch: [5][2150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1553e-01 (8.9590e-01)\n",
      "Epoch: [5][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.1862e-01 (8.9603e-01)\n",
      "Epoch: [5][2250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.9183e-01 (8.9598e-01)\n",
      "Epoch: [5][2300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3385e-01 (8.9599e-01)\n",
      "Epoch: [5][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.8462e-01 (8.9636e-01)\n",
      "Epoch: [5][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0101e+00 (8.9668e-01)\n",
      "Epoch: [5][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0185e+00 (8.9655e-01)\n",
      "Epoch: [5][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5184e-01 (8.9647e-01)\n",
      "Epoch: [5][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2899e-01 (8.9722e-01)\n",
      "Epoch: [5][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8451e-01 (8.9714e-01)\n",
      "Epoch: [5][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0752e-01 (8.9705e-01)\n",
      "Epoch: [5][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0364e+00 (8.9732e-01)\n",
      "Epoch: [5][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2257e-01 (8.9743e-01)\n",
      "Epoch: [5][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8245e-01 (8.9778e-01)\n",
      "Epoch: [5][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5749e-01 (8.9790e-01)\n",
      "Epoch: [5][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2922e-01 (8.9787e-01)\n",
      "Epoch: [5][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6214e-01 (8.9826e-01)\n",
      "Epoch: [5][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0477e+00 (8.9862e-01)\n",
      "Epoch: [5][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1457e-01 (8.9846e-01)\n",
      "Epoch: [5][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9544e-01 (8.9856e-01)\n",
      "Epoch: [5][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0757e-01 (8.9855e-01)\n",
      "Epoch: [5][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1014e-01 (8.9887e-01)\n",
      "Epoch: [5][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0032e+00 (8.9888e-01)\n",
      "Epoch: [5][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6638e-01 (8.9926e-01)\n",
      "Epoch: [5][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1126e+00 (8.9927e-01)\n",
      "Epoch: [5][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2851e-01 (8.9936e-01)\n",
      "Epoch: [5][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6883e-01 (8.9973e-01)\n",
      "Epoch: [5][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1042e+00 (9.0017e-01)\n",
      "Epoch: [5][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3943e-01 (9.0049e-01)\n",
      "Epoch: [5][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1458e+00 (9.0065e-01)\n",
      "Epoch: [5][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0551e+00 (9.0046e-01)\n",
      "Epoch: [5][3700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1131e+00 (9.0093e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [5][3750/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1003e-01 (9.0097e-01)\n",
      "Epoch: [5][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4870e-01 (9.0128e-01)\n",
      "Epoch: [5][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5548e-01 (9.0146e-01)\n",
      "Epoch: [5][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0690e-01 (9.0146e-01)\n",
      "Epoch: [5][3950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8532e-01 (9.0134e-01)\n",
      "Epoch: [5][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1261e+00 (9.0185e-01)\n",
      "Epoch: [5][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6820e-01 (9.0223e-01)\n",
      "Epoch: [5][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0206e+00 (9.0248e-01)\n",
      "Epoch: [5][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6634e-01 (9.0240e-01)\n",
      "Epoch: [5][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8007e-01 (9.0246e-01)\n",
      "Epoch: [5][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8827e-01 (9.0275e-01)\n",
      "Epoch: [5][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.2342e+00 (9.0291e-01)\n",
      "Epoch: [5][4350/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0829e-01 (9.0274e-01)\n",
      "Epoch: [5][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1958e+00 (9.0314e-01)\n",
      "Epoch: [5][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2793e-01 (9.0332e-01)\n",
      "Epoch: [5][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5883e-01 (9.0311e-01)\n",
      "Epoch: [5][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2262e-01 (9.0344e-01)\n",
      "Epoch: [5][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0068e-01 (9.0356e-01)\n",
      "Epoch: [5][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9118e-01 (9.0374e-01)\n",
      "Epoch: [5][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7822e-01 (9.0402e-01)\n",
      "Epoch: [5][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9981e-01 (9.0406e-01)\n",
      "Epoch: [5][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4456e-01 (9.0425e-01)\n",
      "Epoch: [5][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5849e-01 (9.0434e-01)\n",
      "Epoch: [5][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0658e-01 (9.0460e-01)\n",
      "Epoch: [5][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0848e+00 (9.0469e-01)\n",
      "Epoch: [5][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5858e-01 (9.0487e-01)\n",
      "Test: [  0/196]\tTime  3.603 ( 3.603)\tLoss 6.2388e-01 (6.2388e-01)\tAcc@1  83.20 ( 83.20)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.496)\tLoss 4.5533e-01 (7.9252e-01)\tAcc@1  88.28 ( 79.12)\tAcc@5  97.66 ( 94.69)\n",
      "Test: [100/196]\tTime  0.434 ( 0.465)\tLoss 1.3732e+00 (9.2615e-01)\tAcc@1  61.72 ( 75.87)\tAcc@5  88.28 ( 93.25)\n",
      "Test: [150/196]\tTime  0.435 ( 0.455)\tLoss 1.1740e+00 (1.0467e+00)\tAcc@1  73.83 ( 73.39)\tAcc@5  87.89 ( 91.66)\n",
      "epoch 5 0.9048650934320421 72.31199645996094 0.007000000000000001 4688969 0.19999437845311407\n",
      "Epoch: [6][   0/5005]\tTime  3.265 ( 3.265)\tData  2.628 ( 2.628)\tLoss 7.5257e-01 (7.5257e-01)\n",
      "Epoch: [6][  50/5005]\tTime  0.637 ( 0.689)\tData  0.000 ( 0.052)\tLoss 1.0942e+00 (8.7794e-01)\n",
      "Epoch: [6][ 100/5005]\tTime  0.638 ( 0.664)\tData  0.000 ( 0.026)\tLoss 9.8324e-01 (8.9257e-01)\n",
      "Epoch: [6][ 150/5005]\tTime  0.637 ( 0.655)\tData  0.000 ( 0.018)\tLoss 8.3471e-01 (8.8130e-01)\n",
      "Epoch: [6][ 200/5005]\tTime  0.637 ( 0.651)\tData  0.000 ( 0.013)\tLoss 8.7947e-01 (8.7676e-01)\n",
      "Epoch: [6][ 250/5005]\tTime  0.638 ( 0.648)\tData  0.000 ( 0.011)\tLoss 8.1288e-01 (8.7431e-01)\n",
      "Epoch: [6][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 7.3292e-01 (8.7234e-01)\n",
      "Epoch: [6][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.9553e-01 (8.7165e-01)\n",
      "Epoch: [6][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 8.5977e-01 (8.7370e-01)\n",
      "Epoch: [6][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.2550e-01 (8.7513e-01)\n",
      "Epoch: [6][ 500/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.005)\tLoss 8.1298e-01 (8.7430e-01)\n",
      "Epoch: [6][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.5787e-01 (8.7476e-01)\n",
      "Epoch: [6][ 600/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.9685e-01 (8.7402e-01)\n",
      "Epoch: [6][ 650/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 9.2877e-01 (8.7400e-01)\n",
      "Epoch: [6][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.5039e-01 (8.7530e-01)\n",
      "Epoch: [6][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.2309e-01 (8.7522e-01)\n",
      "Epoch: [6][ 800/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 1.0390e+00 (8.7561e-01)\n",
      "Epoch: [6][ 850/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.1932e-01 (8.7658e-01)\n",
      "Epoch: [6][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.7352e-01 (8.7635e-01)\n",
      "Epoch: [6][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.0380e-01 (8.7572e-01)\n",
      "Epoch: [6][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.1880e-01 (8.7558e-01)\n",
      "Epoch: [6][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6412e-01 (8.7715e-01)\n",
      "Epoch: [6][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.6806e-01 (8.7775e-01)\n",
      "Epoch: [6][1150/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4012e-01 (8.7807e-01)\n",
      "Epoch: [6][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.9230e-01 (8.7782e-01)\n",
      "Epoch: [6][1250/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.8733e-01 (8.7867e-01)\n",
      "Epoch: [6][1300/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.2003e-01 (8.7900e-01)\n",
      "Epoch: [6][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0225e-01 (8.7967e-01)\n",
      "Epoch: [6][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2131e-01 (8.7968e-01)\n",
      "Epoch: [6][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9922e-01 (8.7996e-01)\n",
      "Epoch: [6][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4727e-01 (8.8057e-01)\n",
      "Epoch: [6][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9774e-01 (8.8168e-01)\n",
      "Epoch: [6][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4666e-01 (8.8222e-01)\n",
      "Epoch: [6][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7438e-01 (8.8274e-01)\n",
      "Epoch: [6][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6274e-01 (8.8315e-01)\n",
      "Epoch: [6][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7895e-01 (8.8268e-01)\n",
      "Epoch: [6][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7512e-01 (8.8323e-01)\n",
      "Epoch: [6][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9899e-01 (8.8312e-01)\n",
      "Epoch: [6][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1283e-01 (8.8304e-01)\n",
      "Epoch: [6][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4727e-01 (8.8360e-01)\n",
      "Epoch: [6][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0698e-01 (8.8405e-01)\n",
      "Epoch: [6][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5538e-01 (8.8393e-01)\n",
      "Epoch: [6][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.7123e-01 (8.8473e-01)\n",
      "Epoch: [6][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0253e+00 (8.8506e-01)\n",
      "Epoch: [6][2200/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.8760e-01 (8.8567e-01)\n",
      "Epoch: [6][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.7514e-01 (8.8612e-01)\n",
      "Epoch: [6][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.6430e-01 (8.8626e-01)\n",
      "Epoch: [6][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.7737e-01 (8.8645e-01)\n",
      "Epoch: [6][2400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4841e-01 (8.8634e-01)\n",
      "Epoch: [6][2450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.2316e+00 (8.8658e-01)\n",
      "Epoch: [6][2500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.6948e-01 (8.8687e-01)\n",
      "Epoch: [6][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3342e-01 (8.8666e-01)\n",
      "Epoch: [6][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5773e-01 (8.8665e-01)\n",
      "Epoch: [6][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0116e+00 (8.8690e-01)\n",
      "Epoch: [6][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6327e-01 (8.8728e-01)\n",
      "Epoch: [6][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4597e-01 (8.8744e-01)\n",
      "Epoch: [6][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0698e-01 (8.8750e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [6][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0037e-01 (8.8789e-01)\n",
      "Epoch: [6][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6868e-01 (8.8791e-01)\n",
      "Epoch: [6][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0967e+00 (8.8804e-01)\n",
      "Epoch: [6][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4886e-01 (8.8817e-01)\n",
      "Epoch: [6][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0245e+00 (8.8826e-01)\n",
      "Epoch: [6][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6061e-01 (8.8856e-01)\n",
      "Epoch: [6][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0847e-01 (8.8885e-01)\n",
      "Epoch: [6][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5396e-01 (8.8895e-01)\n",
      "Epoch: [6][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1176e-01 (8.8927e-01)\n",
      "Epoch: [6][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3716e-01 (8.8962e-01)\n",
      "Epoch: [6][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9913e-01 (8.8930e-01)\n",
      "Epoch: [6][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1402e+00 (8.8946e-01)\n",
      "Epoch: [6][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4961e-01 (8.8954e-01)\n",
      "Epoch: [6][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0342e-01 (8.8998e-01)\n",
      "Epoch: [6][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3563e-01 (8.9034e-01)\n",
      "Epoch: [6][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5743e-01 (8.9070e-01)\n",
      "Epoch: [6][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0942e+00 (8.9035e-01)\n",
      "Epoch: [6][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9442e-01 (8.9084e-01)\n",
      "Epoch: [6][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7269e-01 (8.9117e-01)\n",
      "Epoch: [6][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3086e-01 (8.9161e-01)\n",
      "Epoch: [6][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0481e+00 (8.9195e-01)\n",
      "Epoch: [6][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1461e-01 (8.9191e-01)\n",
      "Epoch: [6][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0026e+00 (8.9206e-01)\n",
      "Epoch: [6][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6705e-01 (8.9251e-01)\n",
      "Epoch: [6][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7704e-01 (8.9282e-01)\n",
      "Epoch: [6][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1792e-01 (8.9274e-01)\n",
      "Epoch: [6][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8411e-01 (8.9306e-01)\n",
      "Epoch: [6][4200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2967e-01 (8.9300e-01)\n",
      "Epoch: [6][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2741e-01 (8.9324e-01)\n",
      "Epoch: [6][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2249e-01 (8.9322e-01)\n",
      "Epoch: [6][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7808e-01 (8.9310e-01)\n",
      "Epoch: [6][4400/5005]\tTime  0.643 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5390e-01 (8.9320e-01)\n",
      "Epoch: [6][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4015e-01 (8.9346e-01)\n",
      "Epoch: [6][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7525e-01 (8.9370e-01)\n",
      "Epoch: [6][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7046e-01 (8.9405e-01)\n",
      "Epoch: [6][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0288e+00 (8.9427e-01)\n",
      "Epoch: [6][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8855e-01 (8.9451e-01)\n",
      "Epoch: [6][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1199e+00 (8.9507e-01)\n",
      "Epoch: [6][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0497e+00 (8.9533e-01)\n",
      "Epoch: [6][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8064e-01 (8.9554e-01)\n",
      "Epoch: [6][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2588e-01 (8.9565e-01)\n",
      "Epoch: [6][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5900e-01 (8.9564e-01)\n",
      "Epoch: [6][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6199e-01 (8.9568e-01)\n",
      "Epoch: [6][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7183e-01 (8.9567e-01)\n",
      "Test: [  0/196]\tTime  3.609 ( 3.609)\tLoss 5.4273e-01 (5.4273e-01)\tAcc@1  85.16 ( 85.16)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.496)\tLoss 5.1556e-01 (7.9926e-01)\tAcc@1  85.55 ( 78.52)\tAcc@5  96.48 ( 94.83)\n",
      "Test: [100/196]\tTime  0.434 ( 0.465)\tLoss 1.1555e+00 (9.1880e-01)\tAcc@1  71.09 ( 76.03)\tAcc@5  89.84 ( 93.47)\n",
      "Test: [150/196]\tTime  0.434 ( 0.455)\tLoss 1.1689e+00 (1.0375e+00)\tAcc@1  75.39 ( 73.85)\tAcc@5  89.06 ( 91.95)\n",
      "epoch 6 0.89571102848151 72.76599884033203 0.006500000000000002 4688969 0.19999437845311407\n",
      "Epoch: [7][   0/5005]\tTime  3.339 ( 3.339)\tData  2.701 ( 2.701)\tLoss 9.8629e-01 (9.8629e-01)\n",
      "Epoch: [7][  50/5005]\tTime  0.639 ( 0.691)\tData  0.000 ( 0.053)\tLoss 9.3068e-01 (8.5613e-01)\n",
      "Epoch: [7][ 100/5005]\tTime  0.638 ( 0.665)\tData  0.000 ( 0.027)\tLoss 8.4089e-01 (8.6644e-01)\n",
      "Epoch: [7][ 150/5005]\tTime  0.637 ( 0.656)\tData  0.000 ( 0.018)\tLoss 8.1399e-01 (8.6442e-01)\n",
      "Epoch: [7][ 200/5005]\tTime  0.637 ( 0.651)\tData  0.000 ( 0.014)\tLoss 7.8397e-01 (8.6485e-01)\n",
      "Epoch: [7][ 250/5005]\tTime  0.637 ( 0.648)\tData  0.000 ( 0.011)\tLoss 7.4979e-01 (8.5903e-01)\n",
      "Epoch: [7][ 300/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.009)\tLoss 8.9645e-01 (8.6130e-01)\n",
      "Epoch: [7][ 350/5005]\tTime  0.639 ( 0.645)\tData  0.000 ( 0.008)\tLoss 1.0110e+00 (8.6300e-01)\n",
      "Epoch: [7][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 7.7613e-01 (8.6469e-01)\n",
      "Epoch: [7][ 450/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 8.5219e-01 (8.6568e-01)\n",
      "Epoch: [7][ 500/5005]\tTime  0.639 ( 0.643)\tData  0.000 ( 0.006)\tLoss 6.6837e-01 (8.6463e-01)\n",
      "Epoch: [7][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 1.0441e+00 (8.6737e-01)\n",
      "Epoch: [7][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.7368e-01 (8.6741e-01)\n",
      "Epoch: [7][ 650/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 8.4729e-01 (8.6885e-01)\n",
      "Epoch: [7][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.7505e-01 (8.6865e-01)\n",
      "Epoch: [7][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.6644e-01 (8.6785e-01)\n",
      "Epoch: [7][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.0284e-01 (8.6710e-01)\n",
      "Epoch: [7][ 850/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.6369e-01 (8.6554e-01)\n",
      "Epoch: [7][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0531e+00 (8.6643e-01)\n",
      "Epoch: [7][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.0148e-01 (8.6584e-01)\n",
      "Epoch: [7][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6975e-01 (8.6584e-01)\n",
      "Epoch: [7][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0013e+00 (8.6567e-01)\n",
      "Epoch: [7][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.0856e-01 (8.6624e-01)\n",
      "Epoch: [7][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6339e-01 (8.6663e-01)\n",
      "Epoch: [7][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.6670e-01 (8.6762e-01)\n",
      "Epoch: [7][1250/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.8389e-01 (8.6817e-01)\n",
      "Epoch: [7][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0618e-01 (8.6881e-01)\n",
      "Epoch: [7][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9065e-01 (8.6863e-01)\n",
      "Epoch: [7][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0585e+00 (8.6909e-01)\n",
      "Epoch: [7][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0543e+00 (8.6983e-01)\n",
      "Epoch: [7][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1747e-01 (8.7040e-01)\n",
      "Epoch: [7][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8311e-01 (8.7062e-01)\n",
      "Epoch: [7][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.3995e-01 (8.7088e-01)\n",
      "Epoch: [7][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6227e-01 (8.7151e-01)\n",
      "Epoch: [7][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9848e-01 (8.7127e-01)\n",
      "Epoch: [7][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3525e-01 (8.7193e-01)\n",
      "Epoch: [7][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6585e-01 (8.7231e-01)\n",
      "Epoch: [7][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3601e-01 (8.7241e-01)\n",
      "Epoch: [7][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6958e-01 (8.7277e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [7][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1964e-01 (8.7265e-01)\n",
      "Epoch: [7][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9517e-01 (8.7280e-01)\n",
      "Epoch: [7][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.8487e-01 (8.7214e-01)\n",
      "Epoch: [7][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3880e-01 (8.7194e-01)\n",
      "Epoch: [7][2150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4441e-01 (8.7194e-01)\n",
      "Epoch: [7][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0474e+00 (8.7251e-01)\n",
      "Epoch: [7][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.0664e-01 (8.7243e-01)\n",
      "Epoch: [7][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.8646e-01 (8.7333e-01)\n",
      "Epoch: [7][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.7032e-01 (8.7282e-01)\n",
      "Epoch: [7][2400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.0584e-01 (8.7241e-01)\n",
      "Epoch: [7][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3393e-01 (8.7299e-01)\n",
      "Epoch: [7][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3862e-01 (8.7344e-01)\n",
      "Epoch: [7][2550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7252e-01 (8.7371e-01)\n",
      "Epoch: [7][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5939e-01 (8.7424e-01)\n",
      "Epoch: [7][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4471e-01 (8.7446e-01)\n",
      "Epoch: [7][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0825e-01 (8.7439e-01)\n",
      "Epoch: [7][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8141e-01 (8.7509e-01)\n",
      "Epoch: [7][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1581e-01 (8.7562e-01)\n",
      "Epoch: [7][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4970e-01 (8.7583e-01)\n",
      "Epoch: [7][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0877e+00 (8.7634e-01)\n",
      "Epoch: [7][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5469e-01 (8.7651e-01)\n",
      "Epoch: [7][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3376e-01 (8.7666e-01)\n",
      "Epoch: [7][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5877e-01 (8.7699e-01)\n",
      "Epoch: [7][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1526e-01 (8.7704e-01)\n",
      "Epoch: [7][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8653e-01 (8.7715e-01)\n",
      "Epoch: [7][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5726e-01 (8.7729e-01)\n",
      "Epoch: [7][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4424e-01 (8.7727e-01)\n",
      "Epoch: [7][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8446e-01 (8.7732e-01)\n",
      "Epoch: [7][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0703e-01 (8.7749e-01)\n",
      "Epoch: [7][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0989e+00 (8.7756e-01)\n",
      "Epoch: [7][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5232e-01 (8.7733e-01)\n",
      "Epoch: [7][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8730e-01 (8.7745e-01)\n",
      "Epoch: [7][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8662e-01 (8.7800e-01)\n",
      "Epoch: [7][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0043e+00 (8.7825e-01)\n",
      "Epoch: [7][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7209e-01 (8.7831e-01)\n",
      "Epoch: [7][3700/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1995e-01 (8.7869e-01)\n",
      "Epoch: [7][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0361e-01 (8.7881e-01)\n",
      "Epoch: [7][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7811e-01 (8.7933e-01)\n",
      "Epoch: [7][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9478e-01 (8.7942e-01)\n",
      "Epoch: [7][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4262e-01 (8.7983e-01)\n",
      "Epoch: [7][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0438e-01 (8.7994e-01)\n",
      "Epoch: [7][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9922e-01 (8.8023e-01)\n",
      "Epoch: [7][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4491e-01 (8.8058e-01)\n",
      "Epoch: [7][4100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0157e-01 (8.8051e-01)\n",
      "Epoch: [7][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6314e-01 (8.8098e-01)\n",
      "Epoch: [7][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0851e-01 (8.8122e-01)\n",
      "Epoch: [7][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2263e-01 (8.8141e-01)\n",
      "Epoch: [7][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1373e-01 (8.8163e-01)\n",
      "Epoch: [7][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2480e-01 (8.8184e-01)\n",
      "Epoch: [7][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0019e+00 (8.8202e-01)\n",
      "Epoch: [7][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7966e-01 (8.8211e-01)\n",
      "Epoch: [7][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5459e-01 (8.8251e-01)\n",
      "Epoch: [7][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8637e-01 (8.8255e-01)\n",
      "Epoch: [7][4600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9006e-01 (8.8269e-01)\n",
      "Epoch: [7][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0177e+00 (8.8281e-01)\n",
      "Epoch: [7][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3153e-01 (8.8304e-01)\n",
      "Epoch: [7][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8585e-01 (8.8331e-01)\n",
      "Epoch: [7][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5544e-01 (8.8334e-01)\n",
      "Epoch: [7][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0421e-01 (8.8319e-01)\n",
      "Epoch: [7][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2805e-01 (8.8376e-01)\n",
      "Epoch: [7][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4370e-01 (8.8405e-01)\n",
      "Epoch: [7][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1551e-01 (8.8429e-01)\n",
      "Test: [  0/196]\tTime  3.531 ( 3.531)\tLoss 6.4311e-01 (6.4311e-01)\tAcc@1  81.25 ( 81.25)\tAcc@5  96.48 ( 96.48)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.494)\tLoss 5.2169e-01 (7.8805e-01)\tAcc@1  86.33 ( 79.07)\tAcc@5  98.05 ( 94.84)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.2848e+00 (9.2090e-01)\tAcc@1  64.84 ( 76.05)\tAcc@5  88.67 ( 93.40)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1316e+00 (1.0395e+00)\tAcc@1  76.17 ( 73.62)\tAcc@5  89.84 ( 91.86)\n",
      "epoch 7 0.8843046523647071 72.53199768066406 0.006000000000000002 4688969 0.19999437845311407\n",
      "Epoch: [8][   0/5005]\tTime  3.240 ( 3.240)\tData  2.602 ( 2.602)\tLoss 7.1038e-01 (7.1038e-01)\n",
      "Epoch: [8][  50/5005]\tTime  0.637 ( 0.689)\tData  0.000 ( 0.051)\tLoss 7.2394e-01 (8.4270e-01)\n",
      "Epoch: [8][ 100/5005]\tTime  0.637 ( 0.663)\tData  0.000 ( 0.026)\tLoss 7.5309e-01 (8.4470e-01)\n",
      "Epoch: [8][ 150/5005]\tTime  0.638 ( 0.655)\tData  0.000 ( 0.017)\tLoss 9.4516e-01 (8.4582e-01)\n",
      "Epoch: [8][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 7.7739e-01 (8.3968e-01)\n",
      "Epoch: [8][ 250/5005]\tTime  0.638 ( 0.648)\tData  0.000 ( 0.011)\tLoss 8.5414e-01 (8.4161e-01)\n",
      "Epoch: [8][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 7.3516e-01 (8.4797e-01)\n",
      "Epoch: [8][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.0095e-01 (8.4927e-01)\n",
      "Epoch: [8][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 8.4928e-01 (8.4805e-01)\n",
      "Epoch: [8][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.9186e-01 (8.5117e-01)\n",
      "Epoch: [8][ 500/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.005)\tLoss 7.7235e-01 (8.4977e-01)\n",
      "Epoch: [8][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.8152e-01 (8.4979e-01)\n",
      "Epoch: [8][ 600/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.8086e-01 (8.5063e-01)\n",
      "Epoch: [8][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.8780e-01 (8.5047e-01)\n",
      "Epoch: [8][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 9.2655e-01 (8.5005e-01)\n",
      "Epoch: [8][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.0232e-01 (8.5089e-01)\n",
      "Epoch: [8][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 9.5481e-01 (8.5213e-01)\n",
      "Epoch: [8][ 850/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.2220e-01 (8.5234e-01)\n",
      "Epoch: [8][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.5527e-01 (8.5223e-01)\n",
      "Epoch: [8][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.7705e-01 (8.5206e-01)\n",
      "Epoch: [8][1000/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.9653e-01 (8.5220e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [8][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6655e-01 (8.5243e-01)\n",
      "Epoch: [8][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.5820e-01 (8.5243e-01)\n",
      "Epoch: [8][1150/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.7337e-01 (8.5413e-01)\n",
      "Epoch: [8][1200/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.6962e-01 (8.5391e-01)\n",
      "Epoch: [8][1250/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.5204e-01 (8.5403e-01)\n",
      "Epoch: [8][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0930e+00 (8.5568e-01)\n",
      "Epoch: [8][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0301e-01 (8.5676e-01)\n",
      "Epoch: [8][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3515e-01 (8.5699e-01)\n",
      "Epoch: [8][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2366e-01 (8.5693e-01)\n",
      "Epoch: [8][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9438e-01 (8.5733e-01)\n",
      "Epoch: [8][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1066e-01 (8.5791e-01)\n",
      "Epoch: [8][1600/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4659e-01 (8.5781e-01)\n",
      "Epoch: [8][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9006e-01 (8.5824e-01)\n",
      "Epoch: [8][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8125e-01 (8.5852e-01)\n",
      "Epoch: [8][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9500e-01 (8.5851e-01)\n",
      "Epoch: [8][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0777e-01 (8.5896e-01)\n",
      "Epoch: [8][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1652e-01 (8.5950e-01)\n",
      "Epoch: [8][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5402e-01 (8.5963e-01)\n",
      "Epoch: [8][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4517e-01 (8.5973e-01)\n",
      "Epoch: [8][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1667e-01 (8.6039e-01)\n",
      "Epoch: [8][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3599e-01 (8.6084e-01)\n",
      "Epoch: [8][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0171e+00 (8.6141e-01)\n",
      "Epoch: [8][2150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.7335e-01 (8.6122e-01)\n",
      "Epoch: [8][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.6415e-01 (8.6177e-01)\n",
      "Epoch: [8][2250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4853e-01 (8.6184e-01)\n",
      "Epoch: [8][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3381e-01 (8.6169e-01)\n",
      "Epoch: [8][2350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.5700e-01 (8.6179e-01)\n",
      "Epoch: [8][2400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.3222e-01 (8.6177e-01)\n",
      "Epoch: [8][2450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.3589e-01 (8.6164e-01)\n",
      "Epoch: [8][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4703e-01 (8.6143e-01)\n",
      "Epoch: [8][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6665e-01 (8.6202e-01)\n",
      "Epoch: [8][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1106e+00 (8.6218e-01)\n",
      "Epoch: [8][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0902e+00 (8.6261e-01)\n",
      "Epoch: [8][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4187e-01 (8.6277e-01)\n",
      "Epoch: [8][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0015e+00 (8.6277e-01)\n",
      "Epoch: [8][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7052e-01 (8.6259e-01)\n",
      "Epoch: [8][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3545e-01 (8.6243e-01)\n",
      "Epoch: [8][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0070e-01 (8.6264e-01)\n",
      "Epoch: [8][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5568e-01 (8.6289e-01)\n",
      "Epoch: [8][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8506e-01 (8.6343e-01)\n",
      "Epoch: [8][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2946e-01 (8.6376e-01)\n",
      "Epoch: [8][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3745e-01 (8.6414e-01)\n",
      "Epoch: [8][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8312e-01 (8.6450e-01)\n",
      "Epoch: [8][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9464e-01 (8.6467e-01)\n",
      "Epoch: [8][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2964e-01 (8.6469e-01)\n",
      "Epoch: [8][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6306e-01 (8.6461e-01)\n",
      "Epoch: [8][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3250e-01 (8.6480e-01)\n",
      "Epoch: [8][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7305e-01 (8.6480e-01)\n",
      "Epoch: [8][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3322e-01 (8.6505e-01)\n",
      "Epoch: [8][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5622e-01 (8.6538e-01)\n",
      "Epoch: [8][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1920e-01 (8.6574e-01)\n",
      "Epoch: [8][3600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2056e-01 (8.6605e-01)\n",
      "Epoch: [8][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5282e-01 (8.6634e-01)\n",
      "Epoch: [8][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6322e-01 (8.6690e-01)\n",
      "Epoch: [8][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8702e-01 (8.6748e-01)\n",
      "Epoch: [8][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4442e-01 (8.6781e-01)\n",
      "Epoch: [8][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3074e-01 (8.6798e-01)\n",
      "Epoch: [8][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7039e-01 (8.6842e-01)\n",
      "Epoch: [8][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5023e-01 (8.6858e-01)\n",
      "Epoch: [8][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6979e-01 (8.6867e-01)\n",
      "Epoch: [8][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1125e-01 (8.6860e-01)\n",
      "Epoch: [8][4100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0269e+00 (8.6872e-01)\n",
      "Epoch: [8][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7298e-01 (8.6879e-01)\n",
      "Epoch: [8][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4452e-01 (8.6890e-01)\n",
      "Epoch: [8][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9721e-01 (8.6898e-01)\n",
      "Epoch: [8][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7555e-01 (8.6904e-01)\n",
      "Epoch: [8][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1923e-01 (8.6897e-01)\n",
      "Epoch: [8][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2343e-01 (8.6921e-01)\n",
      "Epoch: [8][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8718e-01 (8.6951e-01)\n",
      "Epoch: [8][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5061e-01 (8.6956e-01)\n",
      "Epoch: [8][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0869e+00 (8.6960e-01)\n",
      "Epoch: [8][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1541e-01 (8.6964e-01)\n",
      "Epoch: [8][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0716e-01 (8.6982e-01)\n",
      "Epoch: [8][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1390e-01 (8.6989e-01)\n",
      "Epoch: [8][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8698e-01 (8.6996e-01)\n",
      "Epoch: [8][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2174e-01 (8.7032e-01)\n",
      "Epoch: [8][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1236e-01 (8.7044e-01)\n",
      "Epoch: [8][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2792e-01 (8.7051e-01)\n",
      "Epoch: [8][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1930e-01 (8.7082e-01)\n",
      "Epoch: [8][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4834e-01 (8.7095e-01)\n",
      "Test: [  0/196]\tTime  3.551 ( 3.551)\tLoss 6.3024e-01 (6.3024e-01)\tAcc@1  84.38 ( 84.38)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.495)\tLoss 4.6214e-01 (7.8211e-01)\tAcc@1  89.06 ( 79.16)\tAcc@5  98.05 ( 94.98)\n",
      "Test: [100/196]\tTime  0.434 ( 0.465)\tLoss 1.2642e+00 (9.1129e-01)\tAcc@1  65.62 ( 76.40)\tAcc@5  90.23 ( 93.55)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2030e+00 (1.0235e+00)\tAcc@1  73.44 ( 74.20)\tAcc@5  90.23 ( 92.08)\n",
      "epoch 8 0.8709808636735955 73.08799743652344 0.005500000000000002 4688969 0.19999437845311407\n",
      "Epoch: [9][   0/5005]\tTime  3.275 ( 3.275)\tData  2.633 ( 2.633)\tLoss 8.2721e-01 (8.2721e-01)\n",
      "Epoch: [9][  50/5005]\tTime  0.637 ( 0.689)\tData  0.000 ( 0.052)\tLoss 8.2047e-01 (8.3751e-01)\n",
      "Epoch: [9][ 100/5005]\tTime  0.637 ( 0.664)\tData  0.000 ( 0.026)\tLoss 8.5647e-01 (8.2531e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [9][ 150/5005]\tTime  0.638 ( 0.655)\tData  0.000 ( 0.018)\tLoss 7.7388e-01 (8.3128e-01)\n",
      "Epoch: [9][ 200/5005]\tTime  0.637 ( 0.651)\tData  0.000 ( 0.013)\tLoss 9.2806e-01 (8.3385e-01)\n",
      "Epoch: [9][ 250/5005]\tTime  0.638 ( 0.648)\tData  0.000 ( 0.011)\tLoss 8.4772e-01 (8.3441e-01)\n",
      "Epoch: [9][ 300/5005]\tTime  0.638 ( 0.646)\tData  0.000 ( 0.009)\tLoss 7.4619e-01 (8.3615e-01)\n",
      "Epoch: [9][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.0234e-01 (8.3785e-01)\n",
      "Epoch: [9][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 9.1168e-01 (8.3938e-01)\n",
      "Epoch: [9][ 450/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.6244e-01 (8.4161e-01)\n",
      "Epoch: [9][ 500/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.005)\tLoss 8.5830e-01 (8.3891e-01)\n",
      "Epoch: [9][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.5876e-01 (8.3743e-01)\n",
      "Epoch: [9][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.4209e-01 (8.3824e-01)\n",
      "Epoch: [9][ 650/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 8.8788e-01 (8.3764e-01)\n",
      "Epoch: [9][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.8304e-01 (8.3769e-01)\n",
      "Epoch: [9][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.6323e-01 (8.3895e-01)\n",
      "Epoch: [9][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.2243e-01 (8.3943e-01)\n",
      "Epoch: [9][ 850/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 8.8828e-01 (8.3984e-01)\n",
      "Epoch: [9][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.3538e-01 (8.4101e-01)\n",
      "Epoch: [9][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0238e+00 (8.4099e-01)\n",
      "Epoch: [9][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6658e-01 (8.4096e-01)\n",
      "Epoch: [9][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.2418e-01 (8.4064e-01)\n",
      "Epoch: [9][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6171e-01 (8.4131e-01)\n",
      "Epoch: [9][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 6.1069e-01 (8.4109e-01)\n",
      "Epoch: [9][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 9.8408e-01 (8.4077e-01)\n",
      "Epoch: [9][1250/5005]\tTime  0.637 ( 0.640)\tData  0.001 ( 0.002)\tLoss 8.6667e-01 (8.4213e-01)\n",
      "Epoch: [9][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3274e-01 (8.4196e-01)\n",
      "Epoch: [9][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8873e-01 (8.4301e-01)\n",
      "Epoch: [9][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3988e-01 (8.4322e-01)\n",
      "Epoch: [9][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.2805e-01 (8.4403e-01)\n",
      "Epoch: [9][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5852e-01 (8.4457e-01)\n",
      "Epoch: [9][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.3625e-01 (8.4475e-01)\n",
      "Epoch: [9][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.5609e-01 (8.4532e-01)\n",
      "Epoch: [9][1650/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.9529e-01 (8.4529e-01)\n",
      "Epoch: [9][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7279e-01 (8.4549e-01)\n",
      "Epoch: [9][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.8010e-01 (8.4558e-01)\n",
      "Epoch: [9][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.2353e-01 (8.4580e-01)\n",
      "Epoch: [9][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.3792e-01 (8.4568e-01)\n",
      "Epoch: [9][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.8329e-01 (8.4624e-01)\n",
      "Epoch: [9][1950/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0580e+00 (8.4698e-01)\n",
      "Epoch: [9][2000/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0565e+00 (8.4741e-01)\n",
      "Epoch: [9][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.3304e-01 (8.4788e-01)\n",
      "Epoch: [9][2100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.0169e-01 (8.4830e-01)\n",
      "Epoch: [9][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.8836e-01 (8.4848e-01)\n",
      "Epoch: [9][2200/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.2010e-01 (8.4841e-01)\n",
      "Epoch: [9][2250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4713e-01 (8.4886e-01)\n",
      "Epoch: [9][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0299e+00 (8.4905e-01)\n",
      "Epoch: [9][2350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.8655e-01 (8.4913e-01)\n",
      "Epoch: [9][2400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.3871e-01 (8.4952e-01)\n",
      "Epoch: [9][2450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.6335e-01 (8.5013e-01)\n",
      "Epoch: [9][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8675e-01 (8.4992e-01)\n",
      "Epoch: [9][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3316e-01 (8.5023e-01)\n",
      "Epoch: [9][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1051e-01 (8.5053e-01)\n",
      "Epoch: [9][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0069e+00 (8.5071e-01)\n",
      "Epoch: [9][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7066e-01 (8.5049e-01)\n",
      "Epoch: [9][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8936e-01 (8.5082e-01)\n",
      "Epoch: [9][2800/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0872e-01 (8.5069e-01)\n",
      "Epoch: [9][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1583e-01 (8.5094e-01)\n",
      "Epoch: [9][2900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9183e-01 (8.5162e-01)\n",
      "Epoch: [9][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3748e-01 (8.5190e-01)\n",
      "Epoch: [9][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0619e+00 (8.5224e-01)\n",
      "Epoch: [9][3050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1159e-01 (8.5251e-01)\n",
      "Epoch: [9][3100/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7640e-01 (8.5276e-01)\n",
      "Epoch: [9][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2504e-01 (8.5301e-01)\n",
      "Epoch: [9][3200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6626e-01 (8.5334e-01)\n",
      "Epoch: [9][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7855e-01 (8.5369e-01)\n",
      "Epoch: [9][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9317e-01 (8.5361e-01)\n",
      "Epoch: [9][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5763e-01 (8.5397e-01)\n",
      "Epoch: [9][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3706e-01 (8.5409e-01)\n",
      "Epoch: [9][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0255e+00 (8.5463e-01)\n",
      "Epoch: [9][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7372e-01 (8.5483e-01)\n",
      "Epoch: [9][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8612e-01 (8.5519e-01)\n",
      "Epoch: [9][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0214e+00 (8.5543e-01)\n",
      "Epoch: [9][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9045e-01 (8.5548e-01)\n",
      "Epoch: [9][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0646e+00 (8.5571e-01)\n",
      "Epoch: [9][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1622e-01 (8.5587e-01)\n",
      "Epoch: [9][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9298e-01 (8.5561e-01)\n",
      "Epoch: [9][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0547e-01 (8.5572e-01)\n",
      "Epoch: [9][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6469e-01 (8.5607e-01)\n",
      "Epoch: [9][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1750e-01 (8.5642e-01)\n",
      "Epoch: [9][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6125e-01 (8.5645e-01)\n",
      "Epoch: [9][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6620e-01 (8.5657e-01)\n",
      "Epoch: [9][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0061e+00 (8.5677e-01)\n",
      "Epoch: [9][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9369e-01 (8.5683e-01)\n",
      "Epoch: [9][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0015e-01 (8.5687e-01)\n",
      "Epoch: [9][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1170e-01 (8.5677e-01)\n",
      "Epoch: [9][4300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9203e-01 (8.5680e-01)\n",
      "Epoch: [9][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3470e-01 (8.5690e-01)\n",
      "Epoch: [9][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8920e-01 (8.5701e-01)\n",
      "Epoch: [9][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8551e-01 (8.5704e-01)\n",
      "Epoch: [9][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0908e-01 (8.5708e-01)\n",
      "Epoch: [9][4550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8956e-01 (8.5738e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [9][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4384e-01 (8.5745e-01)\n",
      "Epoch: [9][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9468e-01 (8.5777e-01)\n",
      "Epoch: [9][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4392e-01 (8.5807e-01)\n",
      "Epoch: [9][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6562e-01 (8.5841e-01)\n",
      "Epoch: [9][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6008e-01 (8.5878e-01)\n",
      "Epoch: [9][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7299e-01 (8.5885e-01)\n",
      "Epoch: [9][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5658e-01 (8.5899e-01)\n",
      "Epoch: [9][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7515e-01 (8.5906e-01)\n",
      "Epoch: [9][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0164e+00 (8.5930e-01)\n",
      "Test: [  0/196]\tTime  3.606 ( 3.606)\tLoss 6.0277e-01 (6.0277e-01)\tAcc@1  83.20 ( 83.20)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.496)\tLoss 5.2775e-01 (7.7590e-01)\tAcc@1  85.16 ( 79.20)\tAcc@5  96.48 ( 94.99)\n",
      "Test: [100/196]\tTime  0.434 ( 0.465)\tLoss 1.3342e+00 (8.9730e-01)\tAcc@1  65.23 ( 76.61)\tAcc@5  89.06 ( 93.64)\n",
      "Test: [150/196]\tTime  0.434 ( 0.455)\tLoss 1.2593e+00 (1.0190e+00)\tAcc@1  72.66 ( 74.30)\tAcc@5  88.28 ( 92.01)\n",
      "epoch 9 0.859329387927115 73.44400024414062 0.005000000000000002 4688969 0.19999437845311407\n",
      "Epoch: [10][   0/5005]\tTime  3.134 ( 3.134)\tData  2.494 ( 2.494)\tLoss 9.5250e-01 (9.5250e-01)\n",
      "Epoch: [10][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.049)\tLoss 8.6405e-01 (8.3861e-01)\n",
      "Epoch: [10][ 100/5005]\tTime  0.638 ( 0.662)\tData  0.000 ( 0.025)\tLoss 9.6276e-01 (8.2908e-01)\n",
      "Epoch: [10][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 8.9042e-01 (8.2952e-01)\n",
      "Epoch: [10][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 7.4980e-01 (8.2916e-01)\n",
      "Epoch: [10][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.5565e-01 (8.2538e-01)\n",
      "Epoch: [10][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 9.5864e-01 (8.2203e-01)\n",
      "Epoch: [10][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.007)\tLoss 9.6775e-01 (8.2034e-01)\n",
      "Epoch: [10][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.006)\tLoss 7.1653e-01 (8.2185e-01)\n",
      "Epoch: [10][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 1.0345e+00 (8.2599e-01)\n",
      "Epoch: [10][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.3029e-01 (8.2668e-01)\n",
      "Epoch: [10][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.8167e-01 (8.2525e-01)\n",
      "Epoch: [10][ 600/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.004)\tLoss 9.3848e-01 (8.2554e-01)\n",
      "Epoch: [10][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.6688e-01 (8.2750e-01)\n",
      "Epoch: [10][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.0378e-01 (8.2723e-01)\n",
      "Epoch: [10][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.8975e-01 (8.2844e-01)\n",
      "Epoch: [10][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 7.1864e-01 (8.2813e-01)\n",
      "Epoch: [10][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.6006e-01 (8.2814e-01)\n",
      "Epoch: [10][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.8786e-01 (8.2852e-01)\n",
      "Epoch: [10][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6447e-01 (8.2924e-01)\n",
      "Epoch: [10][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.5889e-01 (8.2946e-01)\n",
      "Epoch: [10][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.4334e-01 (8.2954e-01)\n",
      "Epoch: [10][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.6115e-01 (8.3053e-01)\n",
      "Epoch: [10][1150/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 6.2936e-01 (8.2970e-01)\n",
      "Epoch: [10][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.8228e-01 (8.2975e-01)\n",
      "Epoch: [10][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0672e-01 (8.3017e-01)\n",
      "Epoch: [10][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7535e-01 (8.3065e-01)\n",
      "Epoch: [10][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4485e-01 (8.3112e-01)\n",
      "Epoch: [10][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8214e-01 (8.3107e-01)\n",
      "Epoch: [10][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.9134e-01 (8.3167e-01)\n",
      "Epoch: [10][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9100e-01 (8.3240e-01)\n",
      "Epoch: [10][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0321e+00 (8.3374e-01)\n",
      "Epoch: [10][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0415e+00 (8.3397e-01)\n",
      "Epoch: [10][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3011e-01 (8.3389e-01)\n",
      "Epoch: [10][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8205e-01 (8.3429e-01)\n",
      "Epoch: [10][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7190e-01 (8.3527e-01)\n",
      "Epoch: [10][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3015e-01 (8.3587e-01)\n",
      "Epoch: [10][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.6662e-01 (8.3629e-01)\n",
      "Epoch: [10][1900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7443e-01 (8.3668e-01)\n",
      "Epoch: [10][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.6383e-01 (8.3675e-01)\n",
      "Epoch: [10][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.1497e-01 (8.3658e-01)\n",
      "Epoch: [10][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.1455e-01 (8.3651e-01)\n",
      "Epoch: [10][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 9.4488e-01 (8.3645e-01)\n",
      "Epoch: [10][2150/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.4607e-01 (8.3683e-01)\n",
      "Epoch: [10][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 1.0439e+00 (8.3696e-01)\n",
      "Epoch: [10][2250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 7.2698e-01 (8.3762e-01)\n",
      "Epoch: [10][2300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.6866e-01 (8.3739e-01)\n",
      "Epoch: [10][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3156e-01 (8.3725e-01)\n",
      "Epoch: [10][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0625e+00 (8.3743e-01)\n",
      "Epoch: [10][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1352e+00 (8.3767e-01)\n",
      "Epoch: [10][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6477e-01 (8.3815e-01)\n",
      "Epoch: [10][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1053e+00 (8.3818e-01)\n",
      "Epoch: [10][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7524e-01 (8.3854e-01)\n",
      "Epoch: [10][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5034e-01 (8.3832e-01)\n",
      "Epoch: [10][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7251e-01 (8.3845e-01)\n",
      "Epoch: [10][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5184e-01 (8.3856e-01)\n",
      "Epoch: [10][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2367e-01 (8.3904e-01)\n",
      "Epoch: [10][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0513e-01 (8.3942e-01)\n",
      "Epoch: [10][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3695e-01 (8.3964e-01)\n",
      "Epoch: [10][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1990e-01 (8.3973e-01)\n",
      "Epoch: [10][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5614e-01 (8.3970e-01)\n",
      "Epoch: [10][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0496e+00 (8.3999e-01)\n",
      "Epoch: [10][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6616e-01 (8.4004e-01)\n",
      "Epoch: [10][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9422e-01 (8.4023e-01)\n",
      "Epoch: [10][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4288e-01 (8.4037e-01)\n",
      "Epoch: [10][3250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3810e-01 (8.4054e-01)\n",
      "Epoch: [10][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3966e-01 (8.4095e-01)\n",
      "Epoch: [10][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3438e-01 (8.4088e-01)\n",
      "Epoch: [10][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5897e-01 (8.4110e-01)\n",
      "Epoch: [10][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2057e-01 (8.4120e-01)\n",
      "Epoch: [10][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2631e-01 (8.4151e-01)\n",
      "Epoch: [10][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4068e-01 (8.4207e-01)\n",
      "Epoch: [10][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0856e-01 (8.4186e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [10][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0794e-01 (8.4203e-01)\n",
      "Epoch: [10][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1449e-01 (8.4243e-01)\n",
      "Epoch: [10][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0175e-01 (8.4262e-01)\n",
      "Epoch: [10][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4198e-01 (8.4276e-01)\n",
      "Epoch: [10][3850/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1420e-01 (8.4312e-01)\n",
      "Epoch: [10][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1307e-01 (8.4333e-01)\n",
      "Epoch: [10][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3701e-01 (8.4366e-01)\n",
      "Epoch: [10][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6451e-01 (8.4374e-01)\n",
      "Epoch: [10][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6338e-01 (8.4366e-01)\n",
      "Epoch: [10][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7548e-01 (8.4367e-01)\n",
      "Epoch: [10][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9432e-01 (8.4386e-01)\n",
      "Epoch: [10][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4307e-01 (8.4401e-01)\n",
      "Epoch: [10][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5021e-01 (8.4402e-01)\n",
      "Epoch: [10][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3520e-01 (8.4393e-01)\n",
      "Epoch: [10][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9479e-01 (8.4401e-01)\n",
      "Epoch: [10][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1778e-01 (8.4426e-01)\n",
      "Epoch: [10][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6520e-01 (8.4431e-01)\n",
      "Epoch: [10][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4029e-01 (8.4449e-01)\n",
      "Epoch: [10][4550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8932e-01 (8.4480e-01)\n",
      "Epoch: [10][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2372e-01 (8.4493e-01)\n",
      "Epoch: [10][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2599e-01 (8.4490e-01)\n",
      "Epoch: [10][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2338e-01 (8.4501e-01)\n",
      "Epoch: [10][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9358e-01 (8.4503e-01)\n",
      "Epoch: [10][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4611e-01 (8.4493e-01)\n",
      "Epoch: [10][4850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3759e-01 (8.4499e-01)\n",
      "Epoch: [10][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2725e-01 (8.4514e-01)\n",
      "Epoch: [10][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1504e-01 (8.4511e-01)\n",
      "Epoch: [10][5000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3024e-01 (8.4529e-01)\n",
      "Test: [  0/196]\tTime  3.483 ( 3.483)\tLoss 5.8263e-01 (5.8263e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.493)\tLoss 4.8970e-01 (7.8553e-01)\tAcc@1  85.55 ( 79.02)\tAcc@5  97.66 ( 94.85)\n",
      "Test: [100/196]\tTime  0.433 ( 0.464)\tLoss 1.3545e+00 (9.0600e-01)\tAcc@1  62.89 ( 76.66)\tAcc@5  87.50 ( 93.50)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.2081e+00 (1.0174e+00)\tAcc@1  73.83 ( 74.35)\tAcc@5  89.06 ( 92.08)\n",
      "epoch 10 0.8452889708577439 73.3499984741211 0.004500000000000001 4688969 0.19999437845311407\n",
      "Epoch: [11][   0/5005]\tTime  3.345 ( 3.345)\tData  2.703 ( 2.703)\tLoss 9.4780e-01 (9.4780e-01)\n",
      "Epoch: [11][  50/5005]\tTime  0.637 ( 0.690)\tData  0.000 ( 0.053)\tLoss 1.0907e+00 (8.0844e-01)\n",
      "Epoch: [11][ 100/5005]\tTime  0.637 ( 0.664)\tData  0.000 ( 0.027)\tLoss 8.7668e-01 (8.1402e-01)\n",
      "Epoch: [11][ 150/5005]\tTime  0.637 ( 0.655)\tData  0.000 ( 0.018)\tLoss 8.6846e-01 (8.0499e-01)\n",
      "Epoch: [11][ 200/5005]\tTime  0.638 ( 0.651)\tData  0.000 ( 0.014)\tLoss 7.6219e-01 (8.0830e-01)\n",
      "Epoch: [11][ 250/5005]\tTime  0.637 ( 0.648)\tData  0.000 ( 0.011)\tLoss 8.1132e-01 (8.0786e-01)\n",
      "Epoch: [11][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 9.7371e-01 (8.1107e-01)\n",
      "Epoch: [11][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 9.8777e-01 (8.1609e-01)\n",
      "Epoch: [11][ 400/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 8.6290e-01 (8.1582e-01)\n",
      "Epoch: [11][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.1502e-01 (8.1580e-01)\n",
      "Epoch: [11][ 500/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.1867e-01 (8.1602e-01)\n",
      "Epoch: [11][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.3051e-01 (8.1494e-01)\n",
      "Epoch: [11][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.8568e-01 (8.1558e-01)\n",
      "Epoch: [11][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.5602e-01 (8.1601e-01)\n",
      "Epoch: [11][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.0880e-01 (8.1585e-01)\n",
      "Epoch: [11][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.3045e-01 (8.1609e-01)\n",
      "Epoch: [11][ 800/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.9015e-01 (8.1567e-01)\n",
      "Epoch: [11][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.3828e-01 (8.1519e-01)\n",
      "Epoch: [11][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.4812e-01 (8.1493e-01)\n",
      "Epoch: [11][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6415e-01 (8.1521e-01)\n",
      "Epoch: [11][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.2696e-01 (8.1590e-01)\n",
      "Epoch: [11][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.3263e-01 (8.1550e-01)\n",
      "Epoch: [11][1100/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.2860e-01 (8.1498e-01)\n",
      "Epoch: [11][1150/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9959e-01 (8.1493e-01)\n",
      "Epoch: [11][1200/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.002)\tLoss 8.1142e-01 (8.1493e-01)\n",
      "Epoch: [11][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.9925e-01 (8.1448e-01)\n",
      "Epoch: [11][1300/5005]\tTime  0.633 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.2242e-01 (8.1507e-01)\n",
      "Epoch: [11][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0282e+00 (8.1479e-01)\n",
      "Epoch: [11][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9885e-01 (8.1482e-01)\n",
      "Epoch: [11][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5666e-01 (8.1518e-01)\n",
      "Epoch: [11][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.1870e-01 (8.1557e-01)\n",
      "Epoch: [11][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3386e-01 (8.1547e-01)\n",
      "Epoch: [11][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6591e-01 (8.1560e-01)\n",
      "Epoch: [11][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3254e-01 (8.1497e-01)\n",
      "Epoch: [11][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.5707e-01 (8.1501e-01)\n",
      "Epoch: [11][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6045e-01 (8.1480e-01)\n",
      "Epoch: [11][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.0702e-01 (8.1517e-01)\n",
      "Epoch: [11][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8937e-01 (8.1588e-01)\n",
      "Epoch: [11][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6519e-01 (8.1657e-01)\n",
      "Epoch: [11][1950/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0840e-01 (8.1739e-01)\n",
      "Epoch: [11][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4724e-01 (8.1793e-01)\n",
      "Epoch: [11][2050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3844e-01 (8.1774e-01)\n",
      "Epoch: [11][2100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.7061e-01 (8.1817e-01)\n",
      "Epoch: [11][2150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 6.6768e-01 (8.1863e-01)\n",
      "Epoch: [11][2200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.1768e-01 (8.1873e-01)\n",
      "Epoch: [11][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0507e-01 (8.1901e-01)\n",
      "Epoch: [11][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1111e-01 (8.1910e-01)\n",
      "Epoch: [11][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4992e-01 (8.1881e-01)\n",
      "Epoch: [11][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3018e-01 (8.1913e-01)\n",
      "Epoch: [11][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6595e-01 (8.1946e-01)\n",
      "Epoch: [11][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4204e-01 (8.1954e-01)\n",
      "Epoch: [11][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2731e-01 (8.1944e-01)\n",
      "Epoch: [11][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9022e-01 (8.1993e-01)\n",
      "Epoch: [11][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4202e-01 (8.1973e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [11][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4252e-01 (8.1982e-01)\n",
      "Epoch: [11][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0845e-01 (8.1996e-01)\n",
      "Epoch: [11][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9469e-01 (8.2040e-01)\n",
      "Epoch: [11][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7019e-01 (8.2025e-01)\n",
      "Epoch: [11][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6894e-01 (8.2037e-01)\n",
      "Epoch: [11][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9686e-01 (8.2046e-01)\n",
      "Epoch: [11][3000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9584e-01 (8.2098e-01)\n",
      "Epoch: [11][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9485e-01 (8.2116e-01)\n",
      "Epoch: [11][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9534e-01 (8.2133e-01)\n",
      "Epoch: [11][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7423e-01 (8.2128e-01)\n",
      "Epoch: [11][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3653e-01 (8.2148e-01)\n",
      "Epoch: [11][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2783e-01 (8.2198e-01)\n",
      "Epoch: [11][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7643e-01 (8.2217e-01)\n",
      "Epoch: [11][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0353e-01 (8.2225e-01)\n",
      "Epoch: [11][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0007e-01 (8.2222e-01)\n",
      "Epoch: [11][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7652e-01 (8.2234e-01)\n",
      "Epoch: [11][3500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9558e-01 (8.2255e-01)\n",
      "Epoch: [11][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1772e-01 (8.2283e-01)\n",
      "Epoch: [11][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2571e-01 (8.2328e-01)\n",
      "Epoch: [11][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0626e+00 (8.2356e-01)\n",
      "Epoch: [11][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4606e-01 (8.2411e-01)\n",
      "Epoch: [11][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9592e-01 (8.2434e-01)\n",
      "Epoch: [11][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1847e-01 (8.2433e-01)\n",
      "Epoch: [11][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7082e-01 (8.2451e-01)\n",
      "Epoch: [11][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3025e-01 (8.2447e-01)\n",
      "Epoch: [11][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6794e-01 (8.2481e-01)\n",
      "Epoch: [11][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1227e-01 (8.2484e-01)\n",
      "Epoch: [11][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9382e-01 (8.2486e-01)\n",
      "Epoch: [11][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5797e-01 (8.2487e-01)\n",
      "Epoch: [11][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0552e-01 (8.2511e-01)\n",
      "Epoch: [11][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3635e-01 (8.2515e-01)\n",
      "Epoch: [11][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3246e-01 (8.2520e-01)\n",
      "Epoch: [11][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5481e-01 (8.2529e-01)\n",
      "Epoch: [11][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0982e-01 (8.2521e-01)\n",
      "Epoch: [11][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5660e-01 (8.2564e-01)\n",
      "Epoch: [11][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1101e+00 (8.2552e-01)\n",
      "Epoch: [11][4500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1767e-01 (8.2559e-01)\n",
      "Epoch: [11][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9214e-01 (8.2575e-01)\n",
      "Epoch: [11][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6800e-01 (8.2577e-01)\n",
      "Epoch: [11][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1789e-01 (8.2599e-01)\n",
      "Epoch: [11][4700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8687e-01 (8.2629e-01)\n",
      "Epoch: [11][4750/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0032e+00 (8.2646e-01)\n",
      "Epoch: [11][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8576e-01 (8.2679e-01)\n",
      "Epoch: [11][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4016e-01 (8.2699e-01)\n",
      "Epoch: [11][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7848e-01 (8.2728e-01)\n",
      "Epoch: [11][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0204e+00 (8.2744e-01)\n",
      "Epoch: [11][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4903e-01 (8.2767e-01)\n",
      "Test: [  0/196]\tTime  3.490 ( 3.490)\tLoss 5.6654e-01 (5.6654e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.493)\tLoss 5.6992e-01 (7.6582e-01)\tAcc@1  85.16 ( 79.27)\tAcc@5  97.27 ( 95.08)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.1236e+00 (8.9068e-01)\tAcc@1  65.23 ( 76.61)\tAcc@5  92.58 ( 93.77)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1394e+00 (1.0080e+00)\tAcc@1  73.83 ( 74.35)\tAcc@5  89.06 ( 92.27)\n",
      "epoch 11 0.8277365617901092 73.40999603271484 0.004000000000000002 4688969 0.19999437845311407\n",
      "Epoch: [12][   0/5005]\tTime  3.186 ( 3.186)\tData  2.544 ( 2.544)\tLoss 7.7160e-01 (7.7160e-01)\n",
      "Epoch: [12][  50/5005]\tTime  0.637 ( 0.688)\tData  0.000 ( 0.050)\tLoss 7.9259e-01 (8.0787e-01)\n",
      "Epoch: [12][ 100/5005]\tTime  0.638 ( 0.663)\tData  0.000 ( 0.025)\tLoss 9.5574e-01 (8.0374e-01)\n",
      "Epoch: [12][ 150/5005]\tTime  0.638 ( 0.654)\tData  0.000 ( 0.017)\tLoss 8.2630e-01 (7.9716e-01)\n",
      "Epoch: [12][ 200/5005]\tTime  0.637 ( 0.650)\tData  0.000 ( 0.013)\tLoss 7.5531e-01 (7.9605e-01)\n",
      "Epoch: [12][ 250/5005]\tTime  0.637 ( 0.648)\tData  0.000 ( 0.010)\tLoss 7.8226e-01 (7.9824e-01)\n",
      "Epoch: [12][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 7.9502e-01 (7.9552e-01)\n",
      "Epoch: [12][ 350/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.007)\tLoss 7.3287e-01 (7.9697e-01)\n",
      "Epoch: [12][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.007)\tLoss 7.8944e-01 (7.9893e-01)\n",
      "Epoch: [12][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.1231e-01 (7.9934e-01)\n",
      "Epoch: [12][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.0150e-01 (7.9960e-01)\n",
      "Epoch: [12][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.1517e-01 (7.9978e-01)\n",
      "Epoch: [12][ 600/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.004)\tLoss 8.2293e-01 (7.9968e-01)\n",
      "Epoch: [12][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.6730e-01 (7.9998e-01)\n",
      "Epoch: [12][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.2012e-01 (8.0031e-01)\n",
      "Epoch: [12][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.3781e-01 (8.0045e-01)\n",
      "Epoch: [12][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.3686e-01 (7.9987e-01)\n",
      "Epoch: [12][ 850/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4070e-01 (8.0105e-01)\n",
      "Epoch: [12][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.6993e-01 (8.0075e-01)\n",
      "Epoch: [12][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.1811e-01 (8.0105e-01)\n",
      "Epoch: [12][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.8969e-01 (8.0070e-01)\n",
      "Epoch: [12][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.1289e-01 (8.0028e-01)\n",
      "Epoch: [12][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.9461e-01 (8.0060e-01)\n",
      "Epoch: [12][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.1246e-01 (8.0158e-01)\n",
      "Epoch: [12][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4749e-01 (8.0218e-01)\n",
      "Epoch: [12][1250/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.2564e-01 (8.0236e-01)\n",
      "Epoch: [12][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.3892e-01 (8.0265e-01)\n",
      "Epoch: [12][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6321e-01 (8.0336e-01)\n",
      "Epoch: [12][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.0627e+00 (8.0338e-01)\n",
      "Epoch: [12][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9750e-01 (8.0465e-01)\n",
      "Epoch: [12][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4918e-01 (8.0422e-01)\n",
      "Epoch: [12][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5719e-01 (8.0410e-01)\n",
      "Epoch: [12][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3909e-01 (8.0402e-01)\n",
      "Epoch: [12][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2565e-01 (8.0453e-01)\n",
      "Epoch: [12][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.3795e-01 (8.0501e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [12][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8599e-01 (8.0490e-01)\n",
      "Epoch: [12][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2585e-01 (8.0488e-01)\n",
      "Epoch: [12][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.4108e-01 (8.0500e-01)\n",
      "Epoch: [12][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9772e-01 (8.0529e-01)\n",
      "Epoch: [12][1950/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3613e-01 (8.0570e-01)\n",
      "Epoch: [12][2000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.0488e-01 (8.0664e-01)\n",
      "Epoch: [12][2050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 6.5995e-01 (8.0719e-01)\n",
      "Epoch: [12][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6367e-01 (8.0708e-01)\n",
      "Epoch: [12][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3531e-01 (8.0717e-01)\n",
      "Epoch: [12][2200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5730e-01 (8.0734e-01)\n",
      "Epoch: [12][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1411e-01 (8.0698e-01)\n",
      "Epoch: [12][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1896e-01 (8.0666e-01)\n",
      "Epoch: [12][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3263e-01 (8.0652e-01)\n",
      "Epoch: [12][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7838e-01 (8.0684e-01)\n",
      "Epoch: [12][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2146e-01 (8.0683e-01)\n",
      "Epoch: [12][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5805e-01 (8.0684e-01)\n",
      "Epoch: [12][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7158e-01 (8.0722e-01)\n",
      "Epoch: [12][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2940e-01 (8.0715e-01)\n",
      "Epoch: [12][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1198e-01 (8.0739e-01)\n",
      "Epoch: [12][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4056e-01 (8.0770e-01)\n",
      "Epoch: [12][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.1190e+00 (8.0782e-01)\n",
      "Epoch: [12][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4209e-01 (8.0749e-01)\n",
      "Epoch: [12][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7574e-01 (8.0804e-01)\n",
      "Epoch: [12][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6953e-01 (8.0784e-01)\n",
      "Epoch: [12][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6545e-01 (8.0797e-01)\n",
      "Epoch: [12][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0132e+00 (8.0850e-01)\n",
      "Epoch: [12][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3272e-01 (8.0870e-01)\n",
      "Epoch: [12][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0840e-01 (8.0882e-01)\n",
      "Epoch: [12][3150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1144e-01 (8.0893e-01)\n",
      "Epoch: [12][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8667e-01 (8.0891e-01)\n",
      "Epoch: [12][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6807e-01 (8.0871e-01)\n",
      "Epoch: [12][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6886e-01 (8.0889e-01)\n",
      "Epoch: [12][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4801e-01 (8.0908e-01)\n",
      "Epoch: [12][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7142e-01 (8.0912e-01)\n",
      "Epoch: [12][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0156e-01 (8.0904e-01)\n",
      "Epoch: [12][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4561e-01 (8.0911e-01)\n",
      "Epoch: [12][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9162e-01 (8.0911e-01)\n",
      "Epoch: [12][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8877e-01 (8.0910e-01)\n",
      "Epoch: [12][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3041e-01 (8.0887e-01)\n",
      "Epoch: [12][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4564e-01 (8.0911e-01)\n",
      "Epoch: [12][3750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6028e-01 (8.0932e-01)\n",
      "Epoch: [12][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7515e-01 (8.0933e-01)\n",
      "Epoch: [12][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.9008e-01 (8.0975e-01)\n",
      "Epoch: [12][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8369e-01 (8.0976e-01)\n",
      "Epoch: [12][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7770e-01 (8.0998e-01)\n",
      "Epoch: [12][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0082e-01 (8.1012e-01)\n",
      "Epoch: [12][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5555e-01 (8.1043e-01)\n",
      "Epoch: [12][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1035e-01 (8.1036e-01)\n",
      "Epoch: [12][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8966e-01 (8.1061e-01)\n",
      "Epoch: [12][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6784e-01 (8.1069e-01)\n",
      "Epoch: [12][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5369e-01 (8.1089e-01)\n",
      "Epoch: [12][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3753e-01 (8.1083e-01)\n",
      "Epoch: [12][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0908e-01 (8.1106e-01)\n",
      "Epoch: [12][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6036e-01 (8.1102e-01)\n",
      "Epoch: [12][4450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1033e-01 (8.1119e-01)\n",
      "Epoch: [12][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5566e-01 (8.1149e-01)\n",
      "Epoch: [12][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3022e-01 (8.1137e-01)\n",
      "Epoch: [12][4600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1698e-01 (8.1153e-01)\n",
      "Epoch: [12][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0894e-01 (8.1148e-01)\n",
      "Epoch: [12][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0869e-01 (8.1164e-01)\n",
      "Epoch: [12][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2764e-01 (8.1178e-01)\n",
      "Epoch: [12][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3642e-01 (8.1201e-01)\n",
      "Epoch: [12][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2648e-01 (8.1180e-01)\n",
      "Epoch: [12][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0720e-01 (8.1176e-01)\n",
      "Epoch: [12][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4112e-01 (8.1182e-01)\n",
      "Epoch: [12][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1221e-01 (8.1204e-01)\n",
      "Test: [  0/196]\tTime  3.602 ( 3.602)\tLoss 5.8813e-01 (5.8813e-01)\tAcc@1  82.81 ( 82.81)\tAcc@5  96.88 ( 96.88)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.496)\tLoss 4.9877e-01 (7.3783e-01)\tAcc@1  85.94 ( 80.31)\tAcc@5  96.88 ( 95.37)\n",
      "Test: [100/196]\tTime  0.434 ( 0.465)\tLoss 1.2538e+00 (8.6748e-01)\tAcc@1  63.67 ( 77.29)\tAcc@5  92.19 ( 94.10)\n",
      "Test: [150/196]\tTime  0.434 ( 0.455)\tLoss 1.1602e+00 (9.8271e-01)\tAcc@1  73.83 ( 74.96)\tAcc@5  89.45 ( 92.55)\n",
      "epoch 12 0.8120099425109105 74.04000091552734 0.003500000000000001 4688969 0.19999437845311407\n",
      "Epoch: [13][   0/5005]\tTime  3.031 ( 3.031)\tData  2.392 ( 2.392)\tLoss 8.0598e-01 (8.0598e-01)\n",
      "Epoch: [13][  50/5005]\tTime  0.638 ( 0.684)\tData  0.000 ( 0.047)\tLoss 7.7834e-01 (7.9772e-01)\n",
      "Epoch: [13][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.024)\tLoss 7.5010e-01 (7.9672e-01)\n",
      "Epoch: [13][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 7.9726e-01 (7.9775e-01)\n",
      "Epoch: [13][ 200/5005]\tTime  0.639 ( 0.649)\tData  0.000 ( 0.012)\tLoss 8.0893e-01 (7.9031e-01)\n",
      "Epoch: [13][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 6.2751e-01 (7.8714e-01)\n",
      "Epoch: [13][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 8.1747e-01 (7.8591e-01)\n",
      "Epoch: [13][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 5.9716e-01 (7.8696e-01)\n",
      "Epoch: [13][ 400/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.3750e-01 (7.8269e-01)\n",
      "Epoch: [13][ 450/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 8.3883e-01 (7.8115e-01)\n",
      "Epoch: [13][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 9.3354e-01 (7.8021e-01)\n",
      "Epoch: [13][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.0132e-01 (7.8129e-01)\n",
      "Epoch: [13][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.0184e-01 (7.8046e-01)\n",
      "Epoch: [13][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.3363e-01 (7.8006e-01)\n",
      "Epoch: [13][ 700/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.1927e-01 (7.8027e-01)\n",
      "Epoch: [13][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.1087e-01 (7.8053e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [13][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.7114e-01 (7.8093e-01)\n",
      "Epoch: [13][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.8406e-01 (7.8171e-01)\n",
      "Epoch: [13][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 1.0437e+00 (7.8210e-01)\n",
      "Epoch: [13][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4869e-01 (7.8322e-01)\n",
      "Epoch: [13][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.1496e-01 (7.8335e-01)\n",
      "Epoch: [13][1050/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.2343e-01 (7.8272e-01)\n",
      "Epoch: [13][1100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 1.1341e+00 (7.8341e-01)\n",
      "Epoch: [13][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8597e-01 (7.8291e-01)\n",
      "Epoch: [13][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3110e-01 (7.8167e-01)\n",
      "Epoch: [13][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3331e-01 (7.8141e-01)\n",
      "Epoch: [13][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.7640e-01 (7.8111e-01)\n",
      "Epoch: [13][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.8652e-01 (7.8153e-01)\n",
      "Epoch: [13][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.4283e-01 (7.8151e-01)\n",
      "Epoch: [13][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1527e-01 (7.8189e-01)\n",
      "Epoch: [13][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.3982e-01 (7.8281e-01)\n",
      "Epoch: [13][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2615e-01 (7.8269e-01)\n",
      "Epoch: [13][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0902e-01 (7.8327e-01)\n",
      "Epoch: [13][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4805e-01 (7.8445e-01)\n",
      "Epoch: [13][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.0484e-01 (7.8443e-01)\n",
      "Epoch: [13][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.2163e-01 (7.8546e-01)\n",
      "Epoch: [13][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4425e-01 (7.8555e-01)\n",
      "Epoch: [13][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 8.1653e-01 (7.8597e-01)\n",
      "Epoch: [13][1900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 6.6486e-01 (7.8608e-01)\n",
      "Epoch: [13][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8759e-01 (7.8629e-01)\n",
      "Epoch: [13][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2688e-01 (7.8694e-01)\n",
      "Epoch: [13][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3484e-01 (7.8681e-01)\n",
      "Epoch: [13][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6885e-01 (7.8736e-01)\n",
      "Epoch: [13][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4941e-01 (7.8771e-01)\n",
      "Epoch: [13][2200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3381e-01 (7.8779e-01)\n",
      "Epoch: [13][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8779e-01 (7.8793e-01)\n",
      "Epoch: [13][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 1.0239e+00 (7.8847e-01)\n",
      "Epoch: [13][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5041e-01 (7.8871e-01)\n",
      "Epoch: [13][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4577e-01 (7.8890e-01)\n",
      "Epoch: [13][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4950e-01 (7.8891e-01)\n",
      "Epoch: [13][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4018e-01 (7.8962e-01)\n",
      "Epoch: [13][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2404e-01 (7.8979e-01)\n",
      "Epoch: [13][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9900e-01 (7.8946e-01)\n",
      "Epoch: [13][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0850e-01 (7.8945e-01)\n",
      "Epoch: [13][2700/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6900e-01 (7.8923e-01)\n",
      "Epoch: [13][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8786e-01 (7.8896e-01)\n",
      "Epoch: [13][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7544e-01 (7.8938e-01)\n",
      "Epoch: [13][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3520e-01 (7.8942e-01)\n",
      "Epoch: [13][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3573e-01 (7.8946e-01)\n",
      "Epoch: [13][2950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8557e-01 (7.8945e-01)\n",
      "Epoch: [13][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0037e-01 (7.8997e-01)\n",
      "Epoch: [13][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9431e-01 (7.9012e-01)\n",
      "Epoch: [13][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0366e-01 (7.9019e-01)\n",
      "Epoch: [13][3150/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1021e-01 (7.9054e-01)\n",
      "Epoch: [13][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8423e-01 (7.9075e-01)\n",
      "Epoch: [13][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7854e-01 (7.9092e-01)\n",
      "Epoch: [13][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0659e-01 (7.9103e-01)\n",
      "Epoch: [13][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3041e-01 (7.9113e-01)\n",
      "Epoch: [13][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9320e-01 (7.9175e-01)\n",
      "Epoch: [13][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5443e-01 (7.9182e-01)\n",
      "Epoch: [13][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8191e-01 (7.9210e-01)\n",
      "Epoch: [13][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1966e-01 (7.9174e-01)\n",
      "Epoch: [13][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8244e-01 (7.9219e-01)\n",
      "Epoch: [13][3650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0424e-01 (7.9265e-01)\n",
      "Epoch: [13][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5296e-01 (7.9238e-01)\n",
      "Epoch: [13][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8290e-01 (7.9248e-01)\n",
      "Epoch: [13][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0868e-01 (7.9232e-01)\n",
      "Epoch: [13][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0402e-01 (7.9239e-01)\n",
      "Epoch: [13][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6562e-01 (7.9245e-01)\n",
      "Epoch: [13][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4241e-01 (7.9254e-01)\n",
      "Epoch: [13][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5759e-01 (7.9267e-01)\n",
      "Epoch: [13][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.3500e-01 (7.9273e-01)\n",
      "Epoch: [13][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8800e-01 (7.9288e-01)\n",
      "Epoch: [13][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.8596e-01 (7.9306e-01)\n",
      "Epoch: [13][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8424e-01 (7.9341e-01)\n",
      "Epoch: [13][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6223e-01 (7.9346e-01)\n",
      "Epoch: [13][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0916e-01 (7.9346e-01)\n",
      "Epoch: [13][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4606e-01 (7.9374e-01)\n",
      "Epoch: [13][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9578e-01 (7.9373e-01)\n",
      "Epoch: [13][4450/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2488e-01 (7.9362e-01)\n",
      "Epoch: [13][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4671e-01 (7.9400e-01)\n",
      "Epoch: [13][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5345e-01 (7.9427e-01)\n",
      "Epoch: [13][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5837e-01 (7.9431e-01)\n",
      "Epoch: [13][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0851e-01 (7.9427e-01)\n",
      "Epoch: [13][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1734e-01 (7.9443e-01)\n",
      "Epoch: [13][4750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3465e-01 (7.9453e-01)\n",
      "Epoch: [13][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3553e-01 (7.9490e-01)\n",
      "Epoch: [13][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0893e-01 (7.9521e-01)\n",
      "Epoch: [13][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8848e-01 (7.9535e-01)\n",
      "Epoch: [13][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1292e-01 (7.9559e-01)\n",
      "Epoch: [13][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4137e-01 (7.9569e-01)\n",
      "Test: [  0/196]\tTime  3.552 ( 3.552)\tLoss 5.4794e-01 (5.4794e-01)\tAcc@1  83.59 ( 83.59)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.495)\tLoss 4.8059e-01 (7.3333e-01)\tAcc@1  87.11 ( 80.27)\tAcc@5  96.88 ( 95.31)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test: [100/196]\tTime  0.434 ( 0.465)\tLoss 1.2843e+00 (8.5941e-01)\tAcc@1  62.89 ( 77.54)\tAcc@5  89.45 ( 93.92)\n",
      "Test: [150/196]\tTime  0.434 ( 0.455)\tLoss 1.1253e+00 (9.7315e-01)\tAcc@1  75.78 ( 75.36)\tAcc@5  90.23 ( 92.50)\n",
      "epoch 13 0.7956984436878033 74.4020004272461 0.0030000000000000014 4688969 0.19999437845311407\n",
      "Epoch: [14][   0/5005]\tTime  3.107 ( 3.107)\tData  2.465 ( 2.465)\tLoss 8.9047e-01 (8.9047e-01)\n",
      "Epoch: [14][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.049)\tLoss 8.6609e-01 (7.7221e-01)\n",
      "Epoch: [14][ 100/5005]\tTime  0.637 ( 0.662)\tData  0.000 ( 0.025)\tLoss 6.7207e-01 (7.7990e-01)\n",
      "Epoch: [14][ 150/5005]\tTime  0.637 ( 0.654)\tData  0.000 ( 0.017)\tLoss 7.7865e-01 (7.7834e-01)\n",
      "Epoch: [14][ 200/5005]\tTime  0.638 ( 0.650)\tData  0.000 ( 0.012)\tLoss 8.4829e-01 (7.7596e-01)\n",
      "Epoch: [14][ 250/5005]\tTime  0.638 ( 0.647)\tData  0.000 ( 0.010)\tLoss 8.3656e-01 (7.7490e-01)\n",
      "Epoch: [14][ 300/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.008)\tLoss 6.8149e-01 (7.7033e-01)\n",
      "Epoch: [14][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 5.9743e-01 (7.6780e-01)\n",
      "Epoch: [14][ 400/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.006)\tLoss 8.9669e-01 (7.6615e-01)\n",
      "Epoch: [14][ 450/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 6.1375e-01 (7.6628e-01)\n",
      "Epoch: [14][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.1753e-01 (7.6483e-01)\n",
      "Epoch: [14][ 550/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.4341e-01 (7.6425e-01)\n",
      "Epoch: [14][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.4932e-01 (7.6482e-01)\n",
      "Epoch: [14][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.2315e-01 (7.6448e-01)\n",
      "Epoch: [14][ 700/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.3650e-01 (7.6444e-01)\n",
      "Epoch: [14][ 750/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.003)\tLoss 6.9006e-01 (7.6472e-01)\n",
      "Epoch: [14][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.5859e-01 (7.6450e-01)\n",
      "Epoch: [14][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.4396e-01 (7.6520e-01)\n",
      "Epoch: [14][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.4725e-01 (7.6489e-01)\n",
      "Epoch: [14][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 9.1664e-01 (7.6563e-01)\n",
      "Epoch: [14][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.3424e-01 (7.6731e-01)\n",
      "Epoch: [14][1050/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.1678e-01 (7.6780e-01)\n",
      "Epoch: [14][1100/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.002)\tLoss 7.1462e-01 (7.6720e-01)\n",
      "Epoch: [14][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1922e-01 (7.6689e-01)\n",
      "Epoch: [14][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.1428e-01 (7.6737e-01)\n",
      "Epoch: [14][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.9322e-01 (7.6729e-01)\n",
      "Epoch: [14][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 5.9747e-01 (7.6707e-01)\n",
      "Epoch: [14][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.8707e-01 (7.6750e-01)\n",
      "Epoch: [14][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.3683e-01 (7.6725e-01)\n",
      "Epoch: [14][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.1380e-01 (7.6762e-01)\n",
      "Epoch: [14][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5506e-01 (7.6797e-01)\n",
      "Epoch: [14][1550/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7120e-01 (7.6813e-01)\n",
      "Epoch: [14][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7242e-01 (7.6794e-01)\n",
      "Epoch: [14][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6692e-01 (7.6806e-01)\n",
      "Epoch: [14][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8314e-01 (7.6864e-01)\n",
      "Epoch: [14][1750/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6946e-01 (7.6863e-01)\n",
      "Epoch: [14][1800/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5799e-01 (7.6875e-01)\n",
      "Epoch: [14][1850/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3946e-01 (7.6972e-01)\n",
      "Epoch: [14][1900/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.2268e-01 (7.6957e-01)\n",
      "Epoch: [14][1950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1556e-01 (7.6935e-01)\n",
      "Epoch: [14][2000/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3907e-01 (7.6969e-01)\n",
      "Epoch: [14][2050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7832e-01 (7.7023e-01)\n",
      "Epoch: [14][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4388e-01 (7.7030e-01)\n",
      "Epoch: [14][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8991e-01 (7.6984e-01)\n",
      "Epoch: [14][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3192e-01 (7.6978e-01)\n",
      "Epoch: [14][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.7022e-01 (7.6971e-01)\n",
      "Epoch: [14][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.0933e-01 (7.6991e-01)\n",
      "Epoch: [14][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2375e-01 (7.7021e-01)\n",
      "Epoch: [14][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5814e-01 (7.7075e-01)\n",
      "Epoch: [14][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3186e-01 (7.7075e-01)\n",
      "Epoch: [14][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6308e-01 (7.7079e-01)\n",
      "Epoch: [14][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5992e-01 (7.7060e-01)\n",
      "Epoch: [14][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1097e-01 (7.7073e-01)\n",
      "Epoch: [14][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5615e-01 (7.7101e-01)\n",
      "Epoch: [14][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.1346e-01 (7.7119e-01)\n",
      "Epoch: [14][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5462e-01 (7.7110e-01)\n",
      "Epoch: [14][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1229e-01 (7.7139e-01)\n",
      "Epoch: [14][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.1760e-01 (7.7165e-01)\n",
      "Epoch: [14][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9279e-01 (7.7177e-01)\n",
      "Epoch: [14][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8605e-01 (7.7197e-01)\n",
      "Epoch: [14][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9453e-01 (7.7180e-01)\n",
      "Epoch: [14][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4189e-01 (7.7167e-01)\n",
      "Epoch: [14][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8465e-01 (7.7191e-01)\n",
      "Epoch: [14][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4347e-01 (7.7186e-01)\n",
      "Epoch: [14][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3520e-01 (7.7210e-01)\n",
      "Epoch: [14][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0338e-01 (7.7255e-01)\n",
      "Epoch: [14][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8326e-01 (7.7281e-01)\n",
      "Epoch: [14][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.5898e-01 (7.7289e-01)\n",
      "Epoch: [14][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6015e-01 (7.7304e-01)\n",
      "Epoch: [14][3450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2333e-01 (7.7315e-01)\n",
      "Epoch: [14][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6093e-01 (7.7330e-01)\n",
      "Epoch: [14][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1214e-01 (7.7320e-01)\n",
      "Epoch: [14][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2298e-01 (7.7328e-01)\n",
      "Epoch: [14][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0196e-01 (7.7334e-01)\n",
      "Epoch: [14][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3867e-01 (7.7339e-01)\n",
      "Epoch: [14][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4622e-01 (7.7337e-01)\n",
      "Epoch: [14][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2853e-01 (7.7326e-01)\n",
      "Epoch: [14][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1210e-01 (7.7338e-01)\n",
      "Epoch: [14][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5526e-01 (7.7372e-01)\n",
      "Epoch: [14][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4331e-01 (7.7382e-01)\n",
      "Epoch: [14][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2352e-01 (7.7399e-01)\n",
      "Epoch: [14][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6329e-01 (7.7417e-01)\n",
      "Epoch: [14][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1362e-01 (7.7417e-01)\n",
      "Epoch: [14][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6399e-01 (7.7430e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [14][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8685e-01 (7.7438e-01)\n",
      "Epoch: [14][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4248e-01 (7.7444e-01)\n",
      "Epoch: [14][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6234e-01 (7.7451e-01)\n",
      "Epoch: [14][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0747e-01 (7.7462e-01)\n",
      "Epoch: [14][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5365e-01 (7.7492e-01)\n",
      "Epoch: [14][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8695e-01 (7.7505e-01)\n",
      "Epoch: [14][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5963e-01 (7.7521e-01)\n",
      "Epoch: [14][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3392e-01 (7.7532e-01)\n",
      "Epoch: [14][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4514e-01 (7.7543e-01)\n",
      "Epoch: [14][4650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5981e-01 (7.7533e-01)\n",
      "Epoch: [14][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9918e-01 (7.7563e-01)\n",
      "Epoch: [14][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1107e-01 (7.7593e-01)\n",
      "Epoch: [14][4800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.4428e-01 (7.7569e-01)\n",
      "Epoch: [14][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7994e-01 (7.7569e-01)\n",
      "Epoch: [14][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0147e-01 (7.7608e-01)\n",
      "Epoch: [14][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1893e-01 (7.7629e-01)\n",
      "Epoch: [14][5000/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9888e-01 (7.7637e-01)\n",
      "Test: [  0/196]\tTime  3.564 ( 3.564)\tLoss 5.7869e-01 (5.7869e-01)\tAcc@1  83.98 ( 83.98)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.495)\tLoss 4.1275e-01 (7.3577e-01)\tAcc@1  88.67 ( 80.55)\tAcc@5  98.44 ( 95.29)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.3585e+00 (8.6431e-01)\tAcc@1  61.72 ( 77.60)\tAcc@5  91.02 ( 94.09)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1969e+00 (9.8386e-01)\tAcc@1  74.22 ( 75.20)\tAcc@5  87.89 ( 92.58)\n",
      "epoch 14 0.7764010144394812 74.06199645996094 0.002500000000000001 4688969 0.19999437845311407\n",
      "Epoch: [15][   0/5005]\tTime  3.075 ( 3.075)\tData  2.433 ( 2.433)\tLoss 8.0146e-01 (8.0146e-01)\n",
      "Epoch: [15][  50/5005]\tTime  0.636 ( 0.685)\tData  0.000 ( 0.048)\tLoss 7.7435e-01 (7.3062e-01)\n",
      "Epoch: [15][ 100/5005]\tTime  0.636 ( 0.661)\tData  0.000 ( 0.024)\tLoss 7.6238e-01 (7.3524e-01)\n",
      "Epoch: [15][ 150/5005]\tTime  0.636 ( 0.653)\tData  0.000 ( 0.016)\tLoss 7.1839e-01 (7.3876e-01)\n",
      "Epoch: [15][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 7.2408e-01 (7.3880e-01)\n",
      "Epoch: [15][ 250/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.010)\tLoss 7.5715e-01 (7.4177e-01)\n",
      "Epoch: [15][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 7.3042e-01 (7.4149e-01)\n",
      "Epoch: [15][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 6.6305e-01 (7.4076e-01)\n",
      "Epoch: [15][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 6.8878e-01 (7.3981e-01)\n",
      "Epoch: [15][ 450/5005]\tTime  0.636 ( 0.642)\tData  0.000 ( 0.006)\tLoss 8.1730e-01 (7.4244e-01)\n",
      "Epoch: [15][ 500/5005]\tTime  0.636 ( 0.642)\tData  0.000 ( 0.005)\tLoss 8.0820e-01 (7.4265e-01)\n",
      "Epoch: [15][ 550/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.005)\tLoss 7.9867e-01 (7.4281e-01)\n",
      "Epoch: [15][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.1781e-01 (7.4302e-01)\n",
      "Epoch: [15][ 650/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.004)\tLoss 6.5005e-01 (7.4319e-01)\n",
      "Epoch: [15][ 700/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.004)\tLoss 8.8945e-01 (7.4498e-01)\n",
      "Epoch: [15][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.8701e-01 (7.4581e-01)\n",
      "Epoch: [15][ 800/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.4168e-01 (7.4627e-01)\n",
      "Epoch: [15][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.5705e-01 (7.4745e-01)\n",
      "Epoch: [15][ 900/5005]\tTime  0.636 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.9130e-01 (7.4736e-01)\n",
      "Epoch: [15][ 950/5005]\tTime  0.662 ( 0.639)\tData  0.000 ( 0.003)\tLoss 5.5956e-01 (7.4744e-01)\n",
      "Epoch: [15][1000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 8.4546e-01 (7.4762e-01)\n",
      "Epoch: [15][1050/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.003)\tLoss 7.6636e-01 (7.4629e-01)\n",
      "Epoch: [15][1100/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.9623e-01 (7.4672e-01)\n",
      "Epoch: [15][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.1686e-01 (7.4707e-01)\n",
      "Epoch: [15][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.3870e-01 (7.4701e-01)\n",
      "Epoch: [15][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 9.2788e-01 (7.4803e-01)\n",
      "Epoch: [15][1300/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7839e-01 (7.4825e-01)\n",
      "Epoch: [15][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8717e-01 (7.4882e-01)\n",
      "Epoch: [15][1400/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6594e-01 (7.4851e-01)\n",
      "Epoch: [15][1450/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7756e-01 (7.4863e-01)\n",
      "Epoch: [15][1500/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.3018e-01 (7.4816e-01)\n",
      "Epoch: [15][1550/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.3736e-01 (7.4811e-01)\n",
      "Epoch: [15][1600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.4967e-01 (7.4882e-01)\n",
      "Epoch: [15][1650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.5886e-01 (7.4874e-01)\n",
      "Epoch: [15][1700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 6.1460e-01 (7.4850e-01)\n",
      "Epoch: [15][1750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.8146e-01 (7.4836e-01)\n",
      "Epoch: [15][1800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.2368e-01 (7.4852e-01)\n",
      "Epoch: [15][1850/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.9966e-01 (7.4870e-01)\n",
      "Epoch: [15][1900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7468e-01 (7.4931e-01)\n",
      "Epoch: [15][1950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7901e-01 (7.4961e-01)\n",
      "Epoch: [15][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5036e-01 (7.4979e-01)\n",
      "Epoch: [15][2050/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5440e-01 (7.4978e-01)\n",
      "Epoch: [15][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4535e-01 (7.5007e-01)\n",
      "Epoch: [15][2150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0465e-01 (7.5033e-01)\n",
      "Epoch: [15][2200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3700e-01 (7.5024e-01)\n",
      "Epoch: [15][2250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6853e-01 (7.5015e-01)\n",
      "Epoch: [15][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5938e-01 (7.5021e-01)\n",
      "Epoch: [15][2350/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5239e-01 (7.5064e-01)\n",
      "Epoch: [15][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4727e-01 (7.5094e-01)\n",
      "Epoch: [15][2450/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2486e-01 (7.5092e-01)\n",
      "Epoch: [15][2500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.3420e-01 (7.5106e-01)\n",
      "Epoch: [15][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1090e-01 (7.5085e-01)\n",
      "Epoch: [15][2600/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.0708e-01 (7.5093e-01)\n",
      "Epoch: [15][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3757e-01 (7.5094e-01)\n",
      "Epoch: [15][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1382e-01 (7.5104e-01)\n",
      "Epoch: [15][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8403e-01 (7.5127e-01)\n",
      "Epoch: [15][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7608e-01 (7.5128e-01)\n",
      "Epoch: [15][2850/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1294e-01 (7.5155e-01)\n",
      "Epoch: [15][2900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1438e-01 (7.5185e-01)\n",
      "Epoch: [15][2950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.2423e-01 (7.5177e-01)\n",
      "Epoch: [15][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8673e-01 (7.5192e-01)\n",
      "Epoch: [15][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3100e-01 (7.5211e-01)\n",
      "Epoch: [15][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7445e-01 (7.5202e-01)\n",
      "Epoch: [15][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1531e-01 (7.5230e-01)\n",
      "Epoch: [15][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5277e-01 (7.5239e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [15][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6953e-01 (7.5239e-01)\n",
      "Epoch: [15][3300/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4507e-01 (7.5247e-01)\n",
      "Epoch: [15][3350/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8329e-01 (7.5277e-01)\n",
      "Epoch: [15][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7836e-01 (7.5260e-01)\n",
      "Epoch: [15][3450/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7244e-01 (7.5284e-01)\n",
      "Epoch: [15][3500/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.8272e-01 (7.5288e-01)\n",
      "Epoch: [15][3550/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 9.0002e-01 (7.5286e-01)\n",
      "Epoch: [15][3600/5005]\tTime  0.639 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.2855e-01 (7.5299e-01)\n",
      "Epoch: [15][3650/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.4898e-01 (7.5313e-01)\n",
      "Epoch: [15][3700/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.8688e-01 (7.5317e-01)\n",
      "Epoch: [15][3750/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.0611e-01 (7.5358e-01)\n",
      "Epoch: [15][3800/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.5044e-01 (7.5356e-01)\n",
      "Epoch: [15][3850/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.1295e-01 (7.5382e-01)\n",
      "Epoch: [15][3900/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.5104e-01 (7.5392e-01)\n",
      "Epoch: [15][3950/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.7157e-01 (7.5402e-01)\n",
      "Epoch: [15][4000/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.7556e-01 (7.5398e-01)\n",
      "Epoch: [15][4050/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.1335e-01 (7.5394e-01)\n",
      "Epoch: [15][4100/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.0816e-01 (7.5416e-01)\n",
      "Epoch: [15][4150/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.6954e-01 (7.5407e-01)\n",
      "Epoch: [15][4200/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.3024e-01 (7.5425e-01)\n",
      "Epoch: [15][4250/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.7473e-01 (7.5429e-01)\n",
      "Epoch: [15][4300/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.2929e-01 (7.5441e-01)\n",
      "Epoch: [15][4350/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 9.1434e-01 (7.5434e-01)\n",
      "Epoch: [15][4400/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.8731e-01 (7.5429e-01)\n",
      "Epoch: [15][4450/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.6813e-01 (7.5431e-01)\n",
      "Epoch: [15][4500/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.7634e-01 (7.5449e-01)\n",
      "Epoch: [15][4550/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.8222e-01 (7.5446e-01)\n",
      "Epoch: [15][4600/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.4172e-01 (7.5435e-01)\n",
      "Epoch: [15][4650/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.6284e-01 (7.5450e-01)\n",
      "Epoch: [15][4700/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.2471e-01 (7.5461e-01)\n",
      "Epoch: [15][4750/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.3295e-01 (7.5476e-01)\n",
      "Epoch: [15][4800/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.4080e-01 (7.5474e-01)\n",
      "Epoch: [15][4850/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.2525e-01 (7.5474e-01)\n",
      "Epoch: [15][4900/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.7238e-01 (7.5486e-01)\n",
      "Epoch: [15][4950/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.5455e-01 (7.5491e-01)\n",
      "Epoch: [15][5000/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.1105e-01 (7.5500e-01)\n",
      "Test: [  0/196]\tTime  3.452 ( 3.452)\tLoss 5.1443e-01 (5.1443e-01)\tAcc@1  85.16 ( 85.16)\tAcc@5  98.44 ( 98.44)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.492)\tLoss 3.9797e-01 (7.0876e-01)\tAcc@1  89.45 ( 80.75)\tAcc@5  98.83 ( 95.69)\n",
      "Test: [100/196]\tTime  0.434 ( 0.463)\tLoss 1.2676e+00 (8.3881e-01)\tAcc@1  63.28 ( 77.87)\tAcc@5  90.23 ( 94.28)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.0722e+00 (9.5676e-01)\tAcc@1  77.34 ( 75.50)\tAcc@5  89.06 ( 92.79)\n",
      "epoch 15 0.7549782979855137 74.5199966430664 0.0020000000000000005 4688969 0.19999437845311407\n",
      "Epoch: [16][   0/5005]\tTime  3.151 ( 3.151)\tData  2.509 ( 2.509)\tLoss 7.4438e-01 (7.4438e-01)\n",
      "Epoch: [16][  50/5005]\tTime  0.637 ( 0.686)\tData  0.000 ( 0.049)\tLoss 7.8919e-01 (7.3096e-01)\n",
      "Epoch: [16][ 100/5005]\tTime  0.636 ( 0.662)\tData  0.000 ( 0.025)\tLoss 8.0281e-01 (7.3657e-01)\n",
      "Epoch: [16][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.017)\tLoss 7.0023e-01 (7.3483e-01)\n",
      "Epoch: [16][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.013)\tLoss 7.0522e-01 (7.3555e-01)\n",
      "Epoch: [16][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 7.7848e-01 (7.3188e-01)\n",
      "Epoch: [16][ 300/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.009)\tLoss 7.2957e-01 (7.3469e-01)\n",
      "Epoch: [16][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 7.3221e-01 (7.3478e-01)\n",
      "Epoch: [16][ 400/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.6331e-01 (7.3427e-01)\n",
      "Epoch: [16][ 450/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.006)\tLoss 6.8116e-01 (7.3284e-01)\n",
      "Epoch: [16][ 500/5005]\tTime  0.636 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.1450e-01 (7.3212e-01)\n",
      "Epoch: [16][ 550/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.005)\tLoss 8.3765e-01 (7.3156e-01)\n",
      "Epoch: [16][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 7.5352e-01 (7.3254e-01)\n",
      "Epoch: [16][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 6.8376e-01 (7.3219e-01)\n",
      "Epoch: [16][ 700/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.004)\tLoss 7.9219e-01 (7.3346e-01)\n",
      "Epoch: [16][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.004)\tLoss 6.8850e-01 (7.3300e-01)\n",
      "Epoch: [16][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.8791e-01 (7.3350e-01)\n",
      "Epoch: [16][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.2928e-01 (7.3408e-01)\n",
      "Epoch: [16][ 900/5005]\tTime  0.636 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.9598e-01 (7.3421e-01)\n",
      "Epoch: [16][ 950/5005]\tTime  0.636 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.9212e-01 (7.3429e-01)\n",
      "Epoch: [16][1000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 7.0083e-01 (7.3451e-01)\n",
      "Epoch: [16][1050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 6.5808e-01 (7.3482e-01)\n",
      "Epoch: [16][1100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 7.6970e-01 (7.3427e-01)\n",
      "Epoch: [16][1150/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.1775e-01 (7.3446e-01)\n",
      "Epoch: [16][1200/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.0598e-01 (7.3389e-01)\n",
      "Epoch: [16][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.8732e-01 (7.3439e-01)\n",
      "Epoch: [16][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 5.9998e-01 (7.3315e-01)\n",
      "Epoch: [16][1350/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5296e-01 (7.3309e-01)\n",
      "Epoch: [16][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5843e-01 (7.3384e-01)\n",
      "Epoch: [16][1450/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.1615e-01 (7.3362e-01)\n",
      "Epoch: [16][1500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 6.5673e-01 (7.3407e-01)\n",
      "Epoch: [16][1550/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.4161e-01 (7.3404e-01)\n",
      "Epoch: [16][1600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.1711e-01 (7.3467e-01)\n",
      "Epoch: [16][1650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.3002e-01 (7.3413e-01)\n",
      "Epoch: [16][1700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.2291e-01 (7.3403e-01)\n",
      "Epoch: [16][1750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.9303e-01 (7.3381e-01)\n",
      "Epoch: [16][1800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.4751e-01 (7.3396e-01)\n",
      "Epoch: [16][1850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 6.9303e-01 (7.3407e-01)\n",
      "Epoch: [16][1900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.5856e-01 (7.3450e-01)\n",
      "Epoch: [16][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.1189e-01 (7.3464e-01)\n",
      "Epoch: [16][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2851e-01 (7.3490e-01)\n",
      "Epoch: [16][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5660e-01 (7.3520e-01)\n",
      "Epoch: [16][2100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.1780e-01 (7.3589e-01)\n",
      "Epoch: [16][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6332e-01 (7.3570e-01)\n",
      "Epoch: [16][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1035e-01 (7.3590e-01)\n",
      "Epoch: [16][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8629e-01 (7.3540e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [16][2300/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.6840e-01 (7.3538e-01)\n",
      "Epoch: [16][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1336e-01 (7.3518e-01)\n",
      "Epoch: [16][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1633e-01 (7.3516e-01)\n",
      "Epoch: [16][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5199e-01 (7.3542e-01)\n",
      "Epoch: [16][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3613e-01 (7.3528e-01)\n",
      "Epoch: [16][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3832e-01 (7.3536e-01)\n",
      "Epoch: [16][2600/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0721e-01 (7.3539e-01)\n",
      "Epoch: [16][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8951e-01 (7.3548e-01)\n",
      "Epoch: [16][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2704e-01 (7.3559e-01)\n",
      "Epoch: [16][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.7731e-01 (7.3574e-01)\n",
      "Epoch: [16][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8668e-01 (7.3585e-01)\n",
      "Epoch: [16][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9684e-01 (7.3553e-01)\n",
      "Epoch: [16][2900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.9640e-01 (7.3548e-01)\n",
      "Epoch: [16][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5437e-01 (7.3593e-01)\n",
      "Epoch: [16][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4528e-01 (7.3602e-01)\n",
      "Epoch: [16][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4678e-01 (7.3602e-01)\n",
      "Epoch: [16][3100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0704e-01 (7.3602e-01)\n",
      "Epoch: [16][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.0491e-01 (7.3594e-01)\n",
      "Epoch: [16][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3861e-01 (7.3619e-01)\n",
      "Epoch: [16][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2339e-01 (7.3592e-01)\n",
      "Epoch: [16][3300/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6944e-01 (7.3563e-01)\n",
      "Epoch: [16][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6864e-01 (7.3588e-01)\n",
      "Epoch: [16][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8516e-01 (7.3609e-01)\n",
      "Epoch: [16][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5547e-01 (7.3619e-01)\n",
      "Epoch: [16][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1834e-01 (7.3611e-01)\n",
      "Epoch: [16][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5915e-01 (7.3624e-01)\n",
      "Epoch: [16][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1478e-01 (7.3605e-01)\n",
      "Epoch: [16][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3140e-01 (7.3610e-01)\n",
      "Epoch: [16][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8470e-01 (7.3625e-01)\n",
      "Epoch: [16][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0685e-01 (7.3631e-01)\n",
      "Epoch: [16][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5199e-01 (7.3627e-01)\n",
      "Epoch: [16][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2982e-01 (7.3622e-01)\n",
      "Epoch: [16][3900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.6297e-01 (7.3637e-01)\n",
      "Epoch: [16][3950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.4157e-01 (7.3641e-01)\n",
      "Epoch: [16][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8374e-01 (7.3669e-01)\n",
      "Epoch: [16][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2063e-01 (7.3678e-01)\n",
      "Epoch: [16][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6292e-01 (7.3714e-01)\n",
      "Epoch: [16][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.1264e-01 (7.3715e-01)\n",
      "Epoch: [16][4200/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.4206e-01 (7.3716e-01)\n",
      "Epoch: [16][4250/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.8337e-01 (7.3702e-01)\n",
      "Epoch: [16][4300/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.4400e-01 (7.3705e-01)\n",
      "Epoch: [16][4350/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.7921e-01 (7.3722e-01)\n",
      "Epoch: [16][4400/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.4963e-01 (7.3735e-01)\n",
      "Epoch: [16][4450/5005]\tTime  0.639 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.9356e-01 (7.3740e-01)\n",
      "Epoch: [16][4500/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.8143e-01 (7.3746e-01)\n",
      "Epoch: [16][4550/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.8012e-01 (7.3727e-01)\n",
      "Epoch: [16][4600/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.8661e-01 (7.3756e-01)\n",
      "Epoch: [16][4650/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.0362e-01 (7.3759e-01)\n",
      "Epoch: [16][4700/5005]\tTime  0.635 ( 0.637)\tData  0.000 ( 0.001)\tLoss 9.6016e-01 (7.3783e-01)\n",
      "Epoch: [16][4750/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.9320e-01 (7.3784e-01)\n",
      "Epoch: [16][4800/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.8358e-01 (7.3752e-01)\n",
      "Epoch: [16][4850/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.3867e-01 (7.3740e-01)\n",
      "Epoch: [16][4900/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.0298e-01 (7.3763e-01)\n",
      "Epoch: [16][4950/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.7781e-01 (7.3772e-01)\n",
      "Epoch: [16][5000/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.0313e-01 (7.3772e-01)\n",
      "Test: [  0/196]\tTime  3.263 ( 3.263)\tLoss 5.4622e-01 (5.4622e-01)\tAcc@1  84.77 ( 84.77)\tAcc@5  97.27 ( 97.27)\n",
      "Test: [ 50/196]\tTime  0.433 ( 0.489)\tLoss 3.6336e-01 (7.0763e-01)\tAcc@1  91.02 ( 81.20)\tAcc@5  98.05 ( 95.65)\n",
      "Test: [100/196]\tTime  0.434 ( 0.461)\tLoss 1.1592e+00 (8.3766e-01)\tAcc@1  65.23 ( 78.15)\tAcc@5  91.41 ( 94.28)\n",
      "Test: [150/196]\tTime  0.434 ( 0.452)\tLoss 1.0793e+00 (9.4973e-01)\tAcc@1  76.56 ( 75.94)\tAcc@5  90.23 ( 92.86)\n",
      "epoch 16 0.7377035005529793 75.04999542236328 0.0015000000000000011 4688969 0.19999437845311407\n",
      "Epoch: [17][   0/5005]\tTime  2.959 ( 2.959)\tData  2.317 ( 2.317)\tLoss 7.7992e-01 (7.7992e-01)\n",
      "Epoch: [17][  50/5005]\tTime  0.637 ( 0.683)\tData  0.000 ( 0.046)\tLoss 6.5084e-01 (6.9179e-01)\n",
      "Epoch: [17][ 100/5005]\tTime  0.637 ( 0.660)\tData  0.000 ( 0.023)\tLoss 7.0166e-01 (7.0268e-01)\n",
      "Epoch: [17][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 7.2681e-01 (7.0769e-01)\n",
      "Epoch: [17][ 200/5005]\tTime  0.637 ( 0.649)\tData  0.000 ( 0.012)\tLoss 5.3764e-01 (7.0330e-01)\n",
      "Epoch: [17][ 250/5005]\tTime  0.637 ( 0.646)\tData  0.000 ( 0.009)\tLoss 6.8342e-01 (7.0183e-01)\n",
      "Epoch: [17][ 300/5005]\tTime  0.638 ( 0.645)\tData  0.000 ( 0.008)\tLoss 7.2014e-01 (7.0414e-01)\n",
      "Epoch: [17][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 6.6155e-01 (7.0331e-01)\n",
      "Epoch: [17][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.3642e-01 (7.0543e-01)\n",
      "Epoch: [17][ 450/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 6.5371e-01 (7.0664e-01)\n",
      "Epoch: [17][ 500/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 6.5034e-01 (7.0805e-01)\n",
      "Epoch: [17][ 550/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 8.9364e-01 (7.1042e-01)\n",
      "Epoch: [17][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 6.7303e-01 (7.1147e-01)\n",
      "Epoch: [17][ 650/5005]\tTime  0.638 ( 0.641)\tData  0.000 ( 0.004)\tLoss 6.6820e-01 (7.1071e-01)\n",
      "Epoch: [17][ 700/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.004)\tLoss 7.3223e-01 (7.1056e-01)\n",
      "Epoch: [17][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.3869e-01 (7.1021e-01)\n",
      "Epoch: [17][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.2921e-01 (7.1045e-01)\n",
      "Epoch: [17][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.3545e-01 (7.1064e-01)\n",
      "Epoch: [17][ 900/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 5.5635e-01 (7.1008e-01)\n",
      "Epoch: [17][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.0896e-01 (7.1049e-01)\n",
      "Epoch: [17][1000/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 8.0290e-01 (7.1078e-01)\n",
      "Epoch: [17][1050/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4117e-01 (7.1091e-01)\n",
      "Epoch: [17][1100/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4324e-01 (7.1038e-01)\n",
      "Epoch: [17][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 5.7926e-01 (7.1086e-01)\n",
      "Epoch: [17][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.6880e-01 (7.1080e-01)\n",
      "Epoch: [17][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.7001e-01 (7.1099e-01)\n",
      "Epoch: [17][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6676e-01 (7.1046e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [17][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2281e-01 (7.1093e-01)\n",
      "Epoch: [17][1400/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.7459e-01 (7.1043e-01)\n",
      "Epoch: [17][1450/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.4311e-01 (7.1077e-01)\n",
      "Epoch: [17][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.4292e-01 (7.1078e-01)\n",
      "Epoch: [17][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2949e-01 (7.1033e-01)\n",
      "Epoch: [17][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.1723e-01 (7.1030e-01)\n",
      "Epoch: [17][1650/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.3143e-01 (7.1073e-01)\n",
      "Epoch: [17][1700/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.7572e-01 (7.1092e-01)\n",
      "Epoch: [17][1750/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.3542e-01 (7.1076e-01)\n",
      "Epoch: [17][1800/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 5.9880e-01 (7.1117e-01)\n",
      "Epoch: [17][1850/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.001)\tLoss 6.4098e-01 (7.1137e-01)\n",
      "Epoch: [17][1900/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.001)\tLoss 6.9283e-01 (7.1125e-01)\n",
      "Epoch: [17][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.2159e-01 (7.1140e-01)\n",
      "Epoch: [17][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5574e-01 (7.1179e-01)\n",
      "Epoch: [17][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5310e-01 (7.1237e-01)\n",
      "Epoch: [17][2100/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.5549e-01 (7.1270e-01)\n",
      "Epoch: [17][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0955e-01 (7.1240e-01)\n",
      "Epoch: [17][2200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8239e-01 (7.1257e-01)\n",
      "Epoch: [17][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.0129e-01 (7.1215e-01)\n",
      "Epoch: [17][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4729e-01 (7.1186e-01)\n",
      "Epoch: [17][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5593e-01 (7.1164e-01)\n",
      "Epoch: [17][2400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.0482e-01 (7.1178e-01)\n",
      "Epoch: [17][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5623e-01 (7.1184e-01)\n",
      "Epoch: [17][2500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2180e-01 (7.1149e-01)\n",
      "Epoch: [17][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7795e-01 (7.1147e-01)\n",
      "Epoch: [17][2600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0765e-01 (7.1164e-01)\n",
      "Epoch: [17][2650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3401e-01 (7.1169e-01)\n",
      "Epoch: [17][2700/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8508e-01 (7.1187e-01)\n",
      "Epoch: [17][2750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1496e-01 (7.1214e-01)\n",
      "Epoch: [17][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4323e-01 (7.1195e-01)\n",
      "Epoch: [17][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9035e-01 (7.1216e-01)\n",
      "Epoch: [17][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8463e-01 (7.1217e-01)\n",
      "Epoch: [17][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7217e-01 (7.1190e-01)\n",
      "Epoch: [17][3000/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0242e-01 (7.1197e-01)\n",
      "Epoch: [17][3050/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2756e-01 (7.1205e-01)\n",
      "Epoch: [17][3100/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6479e-01 (7.1254e-01)\n",
      "Epoch: [17][3150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9921e-01 (7.1284e-01)\n",
      "Epoch: [17][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8504e-01 (7.1309e-01)\n",
      "Epoch: [17][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1184e-01 (7.1308e-01)\n",
      "Epoch: [17][3300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6483e-01 (7.1304e-01)\n",
      "Epoch: [17][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6448e-01 (7.1304e-01)\n",
      "Epoch: [17][3400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7297e-01 (7.1293e-01)\n",
      "Epoch: [17][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7448e-01 (7.1317e-01)\n",
      "Epoch: [17][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6109e-01 (7.1331e-01)\n",
      "Epoch: [17][3550/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2077e-01 (7.1349e-01)\n",
      "Epoch: [17][3600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5093e-01 (7.1328e-01)\n",
      "Epoch: [17][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.0751e-01 (7.1349e-01)\n",
      "Epoch: [17][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.9163e-01 (7.1375e-01)\n",
      "Epoch: [17][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3161e-01 (7.1419e-01)\n",
      "Epoch: [17][3800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.5831e-01 (7.1392e-01)\n",
      "Epoch: [17][3850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6905e-01 (7.1384e-01)\n",
      "Epoch: [17][3900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7125e-01 (7.1392e-01)\n",
      "Epoch: [17][3950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.4641e-01 (7.1429e-01)\n",
      "Epoch: [17][4000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3021e-01 (7.1460e-01)\n",
      "Epoch: [17][4050/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8939e-01 (7.1493e-01)\n",
      "Epoch: [17][4100/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7803e-01 (7.1492e-01)\n",
      "Epoch: [17][4150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7073e-01 (7.1495e-01)\n",
      "Epoch: [17][4200/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6908e-01 (7.1498e-01)\n",
      "Epoch: [17][4250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4183e-01 (7.1510e-01)\n",
      "Epoch: [17][4300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8063e-01 (7.1500e-01)\n",
      "Epoch: [17][4350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7680e-01 (7.1484e-01)\n",
      "Epoch: [17][4400/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0546e-01 (7.1474e-01)\n",
      "Epoch: [17][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5935e-01 (7.1471e-01)\n",
      "Epoch: [17][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8595e-01 (7.1479e-01)\n",
      "Epoch: [17][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4836e-01 (7.1484e-01)\n",
      "Epoch: [17][4600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9887e-01 (7.1493e-01)\n",
      "Epoch: [17][4650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6374e-01 (7.1479e-01)\n",
      "Epoch: [17][4700/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8035e-01 (7.1485e-01)\n",
      "Epoch: [17][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7176e-01 (7.1495e-01)\n",
      "Epoch: [17][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7871e-01 (7.1503e-01)\n",
      "Epoch: [17][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4227e-01 (7.1520e-01)\n",
      "Epoch: [17][4900/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.6081e-01 (7.1520e-01)\n",
      "Epoch: [17][4950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8071e-01 (7.1541e-01)\n",
      "Epoch: [17][5000/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7324e-01 (7.1516e-01)\n",
      "Test: [  0/196]\tTime  3.596 ( 3.596)\tLoss 5.1388e-01 (5.1388e-01)\tAcc@1  85.55 ( 85.55)\tAcc@5  98.05 ( 98.05)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.495)\tLoss 4.1267e-01 (6.9626e-01)\tAcc@1  89.84 ( 81.36)\tAcc@5  98.05 ( 95.80)\n",
      "Test: [100/196]\tTime  0.434 ( 0.465)\tLoss 1.1915e+00 (8.2356e-01)\tAcc@1  65.62 ( 78.42)\tAcc@5  90.62 ( 94.49)\n",
      "Test: [150/196]\tTime  0.434 ( 0.455)\tLoss 1.1592e+00 (9.3556e-01)\tAcc@1  76.56 ( 76.28)\tAcc@5  89.84 ( 93.06)\n",
      "epoch 17 0.7151422114011795 75.29999542236328 0.0010000000000000005 4688969 0.19999437845311407\n",
      "Epoch: [18][   0/5005]\tTime  3.004 ( 3.004)\tData  2.364 ( 2.364)\tLoss 8.9545e-01 (8.9545e-01)\n",
      "Epoch: [18][  50/5005]\tTime  0.636 ( 0.684)\tData  0.000 ( 0.047)\tLoss 5.9012e-01 (7.1200e-01)\n",
      "Epoch: [18][ 100/5005]\tTime  0.637 ( 0.661)\tData  0.000 ( 0.024)\tLoss 6.3565e-01 (7.1567e-01)\n",
      "Epoch: [18][ 150/5005]\tTime  0.637 ( 0.653)\tData  0.000 ( 0.016)\tLoss 7.6672e-01 (7.0717e-01)\n",
      "Epoch: [18][ 200/5005]\tTime  0.638 ( 0.649)\tData  0.000 ( 0.012)\tLoss 7.1696e-01 (7.0345e-01)\n",
      "Epoch: [18][ 250/5005]\tTime  0.637 ( 0.647)\tData  0.000 ( 0.010)\tLoss 6.3753e-01 (6.9795e-01)\n",
      "Epoch: [18][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.008)\tLoss 7.1876e-01 (6.9814e-01)\n",
      "Epoch: [18][ 350/5005]\tTime  0.637 ( 0.644)\tData  0.000 ( 0.007)\tLoss 6.4557e-01 (6.9793e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [18][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.006)\tLoss 6.8891e-01 (6.9884e-01)\n",
      "Epoch: [18][ 450/5005]\tTime  0.636 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.1983e-01 (6.9980e-01)\n",
      "Epoch: [18][ 500/5005]\tTime  0.636 ( 0.642)\tData  0.000 ( 0.005)\tLoss 6.8125e-01 (7.0036e-01)\n",
      "Epoch: [18][ 550/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.005)\tLoss 6.2714e-01 (6.9946e-01)\n",
      "Epoch: [18][ 600/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 5.8292e-01 (6.9743e-01)\n",
      "Epoch: [18][ 650/5005]\tTime  0.637 ( 0.641)\tData  0.000 ( 0.004)\tLoss 6.0997e-01 (6.9823e-01)\n",
      "Epoch: [18][ 700/5005]\tTime  0.636 ( 0.640)\tData  0.000 ( 0.004)\tLoss 6.0047e-01 (6.9731e-01)\n",
      "Epoch: [18][ 750/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.9994e-01 (6.9688e-01)\n",
      "Epoch: [18][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.2709e-01 (6.9649e-01)\n",
      "Epoch: [18][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.6085e-01 (6.9571e-01)\n",
      "Epoch: [18][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.4206e-01 (6.9420e-01)\n",
      "Epoch: [18][ 950/5005]\tTime  0.638 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.2038e-01 (6.9490e-01)\n",
      "Epoch: [18][1000/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.003)\tLoss 6.1596e-01 (6.9322e-01)\n",
      "Epoch: [18][1050/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.3776e-01 (6.9265e-01)\n",
      "Epoch: [18][1100/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.7472e-01 (6.9199e-01)\n",
      "Epoch: [18][1150/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.3668e-01 (6.9201e-01)\n",
      "Epoch: [18][1200/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.2375e-01 (6.9218e-01)\n",
      "Epoch: [18][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 5.9238e-01 (6.9245e-01)\n",
      "Epoch: [18][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5137e-01 (6.9254e-01)\n",
      "Epoch: [18][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.3114e-01 (6.9168e-01)\n",
      "Epoch: [18][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.7246e-01 (6.9189e-01)\n",
      "Epoch: [18][1450/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.2460e-01 (6.9184e-01)\n",
      "Epoch: [18][1500/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 8.7416e-01 (6.9186e-01)\n",
      "Epoch: [18][1550/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.7133e-01 (6.9144e-01)\n",
      "Epoch: [18][1600/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.1570e-01 (6.9126e-01)\n",
      "Epoch: [18][1650/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.4400e-01 (6.9175e-01)\n",
      "Epoch: [18][1700/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5628e-01 (6.9203e-01)\n",
      "Epoch: [18][1750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.4492e-01 (6.9183e-01)\n",
      "Epoch: [18][1800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.5083e-01 (6.9130e-01)\n",
      "Epoch: [18][1850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3241e-01 (6.9132e-01)\n",
      "Epoch: [18][1900/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4702e-01 (6.9164e-01)\n",
      "Epoch: [18][1950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5178e-01 (6.9179e-01)\n",
      "Epoch: [18][2000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.0508e-01 (6.9146e-01)\n",
      "Epoch: [18][2050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.6944e-01 (6.9154e-01)\n",
      "Epoch: [18][2100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.3197e-01 (6.9189e-01)\n",
      "Epoch: [18][2150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4379e-01 (6.9139e-01)\n",
      "Epoch: [18][2200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.1600e-01 (6.9198e-01)\n",
      "Epoch: [18][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8261e-01 (6.9220e-01)\n",
      "Epoch: [18][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0186e-01 (6.9219e-01)\n",
      "Epoch: [18][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7002e-01 (6.9258e-01)\n",
      "Epoch: [18][2400/5005]\tTime  0.639 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2755e-01 (6.9253e-01)\n",
      "Epoch: [18][2450/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 9.2197e-01 (6.9238e-01)\n",
      "Epoch: [18][2500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4335e-01 (6.9258e-01)\n",
      "Epoch: [18][2550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.4263e-01 (6.9216e-01)\n",
      "Epoch: [18][2600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7924e-01 (6.9203e-01)\n",
      "Epoch: [18][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.6970e-01 (6.9175e-01)\n",
      "Epoch: [18][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9010e-01 (6.9180e-01)\n",
      "Epoch: [18][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2044e-01 (6.9175e-01)\n",
      "Epoch: [18][2800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5744e-01 (6.9181e-01)\n",
      "Epoch: [18][2850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2440e-01 (6.9159e-01)\n",
      "Epoch: [18][2900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7857e-01 (6.9172e-01)\n",
      "Epoch: [18][2950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8454e-01 (6.9168e-01)\n",
      "Epoch: [18][3000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1544e-01 (6.9193e-01)\n",
      "Epoch: [18][3050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8203e-01 (6.9183e-01)\n",
      "Epoch: [18][3100/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.0663e-01 (6.9175e-01)\n",
      "Epoch: [18][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8679e-01 (6.9194e-01)\n",
      "Epoch: [18][3200/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7509e-01 (6.9191e-01)\n",
      "Epoch: [18][3250/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9760e-01 (6.9170e-01)\n",
      "Epoch: [18][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2084e-01 (6.9143e-01)\n",
      "Epoch: [18][3350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8998e-01 (6.9165e-01)\n",
      "Epoch: [18][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5682e-01 (6.9147e-01)\n",
      "Epoch: [18][3450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2854e-01 (6.9139e-01)\n",
      "Epoch: [18][3500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.2720e-01 (6.9182e-01)\n",
      "Epoch: [18][3550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1652e-01 (6.9175e-01)\n",
      "Epoch: [18][3600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2173e-01 (6.9147e-01)\n",
      "Epoch: [18][3650/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.8788e-01 (6.9160e-01)\n",
      "Epoch: [18][3700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8346e-01 (6.9170e-01)\n",
      "Epoch: [18][3750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8489e-01 (6.9194e-01)\n",
      "Epoch: [18][3800/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.2206e-01 (6.9180e-01)\n",
      "Epoch: [18][3850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4489e-01 (6.9191e-01)\n",
      "Epoch: [18][3900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9931e-01 (6.9197e-01)\n",
      "Epoch: [18][3950/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.1325e-01 (6.9213e-01)\n",
      "Epoch: [18][4000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9382e-01 (6.9228e-01)\n",
      "Epoch: [18][4050/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4816e-01 (6.9250e-01)\n",
      "Epoch: [18][4100/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.2180e-01 (6.9248e-01)\n",
      "Epoch: [18][4150/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.7819e-01 (6.9266e-01)\n",
      "Epoch: [18][4200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8085e-01 (6.9272e-01)\n",
      "Epoch: [18][4250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5632e-01 (6.9300e-01)\n",
      "Epoch: [18][4300/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1649e-01 (6.9305e-01)\n",
      "Epoch: [18][4350/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7169e-01 (6.9308e-01)\n",
      "Epoch: [18][4400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8064e-01 (6.9321e-01)\n",
      "Epoch: [18][4450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1367e-01 (6.9308e-01)\n",
      "Epoch: [18][4500/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0435e-01 (6.9329e-01)\n",
      "Epoch: [18][4550/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5580e-01 (6.9336e-01)\n",
      "Epoch: [18][4600/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.1871e-01 (6.9349e-01)\n",
      "Epoch: [18][4650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5180e-01 (6.9340e-01)\n",
      "Epoch: [18][4700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 8.1462e-01 (6.9341e-01)\n",
      "Epoch: [18][4750/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7360e-01 (6.9339e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [18][4800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7723e-01 (6.9339e-01)\n",
      "Epoch: [18][4850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.1676e-01 (6.9354e-01)\n",
      "Epoch: [18][4900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7415e-01 (6.9372e-01)\n",
      "Epoch: [18][4950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4615e-01 (6.9368e-01)\n",
      "Epoch: [18][5000/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8228e-01 (6.9366e-01)\n",
      "Test: [  0/196]\tTime  3.486 ( 3.486)\tLoss 5.2796e-01 (5.2796e-01)\tAcc@1  83.59 ( 83.59)\tAcc@5  98.05 ( 98.05)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.493)\tLoss 4.0284e-01 (6.8708e-01)\tAcc@1  90.62 ( 81.84)\tAcc@5  98.05 ( 95.83)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.1969e+00 (8.1818e-01)\tAcc@1  66.80 ( 78.68)\tAcc@5  90.62 ( 94.58)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1284e+00 (9.2851e-01)\tAcc@1  77.73 ( 76.53)\tAcc@5  89.84 ( 93.21)\n",
      "epoch 18 0.6936557817617771 75.56600189208984 0.0005000000000000008 4688969 0.19999437845311407\n",
      "Epoch: [19][   0/5005]\tTime  3.262 ( 3.262)\tData  2.622 ( 2.622)\tLoss 7.4651e-01 (7.4651e-01)\n",
      "Epoch: [19][  50/5005]\tTime  0.642 ( 0.689)\tData  0.000 ( 0.052)\tLoss 6.9184e-01 (6.6590e-01)\n",
      "Epoch: [19][ 100/5005]\tTime  0.636 ( 0.664)\tData  0.000 ( 0.026)\tLoss 6.8987e-01 (6.7953e-01)\n",
      "Epoch: [19][ 150/5005]\tTime  0.636 ( 0.655)\tData  0.000 ( 0.018)\tLoss 5.3078e-01 (6.6708e-01)\n",
      "Epoch: [19][ 200/5005]\tTime  0.636 ( 0.650)\tData  0.000 ( 0.013)\tLoss 6.5215e-01 (6.7079e-01)\n",
      "Epoch: [19][ 250/5005]\tTime  0.636 ( 0.647)\tData  0.000 ( 0.011)\tLoss 5.9170e-01 (6.7520e-01)\n",
      "Epoch: [19][ 300/5005]\tTime  0.637 ( 0.645)\tData  0.000 ( 0.009)\tLoss 6.6539e-01 (6.7227e-01)\n",
      "Epoch: [19][ 350/5005]\tTime  0.638 ( 0.644)\tData  0.000 ( 0.008)\tLoss 5.0646e-01 (6.7296e-01)\n",
      "Epoch: [19][ 400/5005]\tTime  0.637 ( 0.643)\tData  0.000 ( 0.007)\tLoss 8.0088e-01 (6.7490e-01)\n",
      "Epoch: [19][ 450/5005]\tTime  0.638 ( 0.643)\tData  0.000 ( 0.006)\tLoss 7.4514e-01 (6.7736e-01)\n",
      "Epoch: [19][ 500/5005]\tTime  0.637 ( 0.642)\tData  0.000 ( 0.005)\tLoss 6.8611e-01 (6.7667e-01)\n",
      "Epoch: [19][ 550/5005]\tTime  0.638 ( 0.642)\tData  0.000 ( 0.005)\tLoss 7.2186e-01 (6.7524e-01)\n",
      "Epoch: [19][ 600/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.005)\tLoss 6.6920e-01 (6.7486e-01)\n",
      "Epoch: [19][ 650/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.004)\tLoss 5.9740e-01 (6.7425e-01)\n",
      "Epoch: [19][ 700/5005]\tTime  0.636 ( 0.641)\tData  0.000 ( 0.004)\tLoss 6.7676e-01 (6.7481e-01)\n",
      "Epoch: [19][ 750/5005]\tTime  0.636 ( 0.640)\tData  0.000 ( 0.004)\tLoss 6.4000e-01 (6.7443e-01)\n",
      "Epoch: [19][ 800/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 6.1743e-01 (6.7524e-01)\n",
      "Epoch: [19][ 850/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 7.3481e-01 (6.7563e-01)\n",
      "Epoch: [19][ 900/5005]\tTime  0.637 ( 0.640)\tData  0.001 ( 0.003)\tLoss 5.6388e-01 (6.7485e-01)\n",
      "Epoch: [19][ 950/5005]\tTime  0.637 ( 0.640)\tData  0.000 ( 0.003)\tLoss 5.7301e-01 (6.7460e-01)\n",
      "Epoch: [19][1000/5005]\tTime  0.639 ( 0.639)\tData  0.000 ( 0.003)\tLoss 7.1447e-01 (6.7478e-01)\n",
      "Epoch: [19][1050/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.003)\tLoss 6.4933e-01 (6.7494e-01)\n",
      "Epoch: [19][1100/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.003)\tLoss 8.1672e-01 (6.7568e-01)\n",
      "Epoch: [19][1150/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.0007e-01 (6.7505e-01)\n",
      "Epoch: [19][1200/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.6551e-01 (6.7504e-01)\n",
      "Epoch: [19][1250/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.1931e-01 (6.7530e-01)\n",
      "Epoch: [19][1300/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 5.7795e-01 (6.7475e-01)\n",
      "Epoch: [19][1350/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.4948e-01 (6.7481e-01)\n",
      "Epoch: [19][1400/5005]\tTime  0.637 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5619e-01 (6.7419e-01)\n",
      "Epoch: [19][1450/5005]\tTime  0.636 ( 0.639)\tData  0.000 ( 0.002)\tLoss 6.2151e-01 (6.7409e-01)\n",
      "Epoch: [19][1500/5005]\tTime  0.638 ( 0.639)\tData  0.000 ( 0.002)\tLoss 7.5110e-01 (6.7350e-01)\n",
      "Epoch: [19][1550/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 5.8034e-01 (6.7291e-01)\n",
      "Epoch: [19][1600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.9438e-01 (6.7275e-01)\n",
      "Epoch: [19][1650/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 4.8262e-01 (6.7350e-01)\n",
      "Epoch: [19][1700/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.2066e-01 (6.7335e-01)\n",
      "Epoch: [19][1750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 8.4664e-01 (6.7381e-01)\n",
      "Epoch: [19][1800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 6.6365e-01 (6.7347e-01)\n",
      "Epoch: [19][1850/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.1881e-01 (6.7394e-01)\n",
      "Epoch: [19][1900/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.1977e-01 (6.7405e-01)\n",
      "Epoch: [19][1950/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.002)\tLoss 7.7597e-01 (6.7399e-01)\n",
      "Epoch: [19][2000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.002)\tLoss 5.6391e-01 (6.7419e-01)\n",
      "Epoch: [19][2050/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9163e-01 (6.7432e-01)\n",
      "Epoch: [19][2100/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.4560e-01 (6.7425e-01)\n",
      "Epoch: [19][2150/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3855e-01 (6.7394e-01)\n",
      "Epoch: [19][2200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.7740e-01 (6.7412e-01)\n",
      "Epoch: [19][2250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5674e-01 (6.7455e-01)\n",
      "Epoch: [19][2300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.6724e-01 (6.7444e-01)\n",
      "Epoch: [19][2350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5548e-01 (6.7487e-01)\n",
      "Epoch: [19][2400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3798e-01 (6.7502e-01)\n",
      "Epoch: [19][2450/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.5615e-01 (6.7500e-01)\n",
      "Epoch: [19][2500/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7952e-01 (6.7508e-01)\n",
      "Epoch: [19][2550/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.2680e-01 (6.7538e-01)\n",
      "Epoch: [19][2600/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5993e-01 (6.7554e-01)\n",
      "Epoch: [19][2650/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.2217e-01 (6.7556e-01)\n",
      "Epoch: [19][2700/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.3491e-01 (6.7586e-01)\n",
      "Epoch: [19][2750/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.8710e-01 (6.7558e-01)\n",
      "Epoch: [19][2800/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 4.8324e-01 (6.7560e-01)\n",
      "Epoch: [19][2850/5005]\tTime  0.638 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.8769e-01 (6.7561e-01)\n",
      "Epoch: [19][2900/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 4.9978e-01 (6.7535e-01)\n",
      "Epoch: [19][2950/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0552e-01 (6.7534e-01)\n",
      "Epoch: [19][3000/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.2411e-01 (6.7544e-01)\n",
      "Epoch: [19][3050/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5568e-01 (6.7559e-01)\n",
      "Epoch: [19][3100/5005]\tTime  0.635 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.5946e-01 (6.7562e-01)\n",
      "Epoch: [19][3150/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 5.2156e-01 (6.7569e-01)\n",
      "Epoch: [19][3200/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.9243e-01 (6.7610e-01)\n",
      "Epoch: [19][3250/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.7664e-01 (6.7602e-01)\n",
      "Epoch: [19][3300/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.9339e-01 (6.7603e-01)\n",
      "Epoch: [19][3350/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.0559e-01 (6.7612e-01)\n",
      "Epoch: [19][3400/5005]\tTime  0.637 ( 0.638)\tData  0.000 ( 0.001)\tLoss 7.8642e-01 (6.7656e-01)\n",
      "Epoch: [19][3450/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.4212e-01 (6.7680e-01)\n",
      "Epoch: [19][3500/5005]\tTime  0.636 ( 0.638)\tData  0.000 ( 0.001)\tLoss 6.5879e-01 (6.7678e-01)\n",
      "Epoch: [19][3550/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.0915e-01 (6.7699e-01)\n",
      "Epoch: [19][3600/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.2696e-01 (6.7734e-01)\n",
      "Epoch: [19][3650/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.2961e-01 (6.7699e-01)\n",
      "Epoch: [19][3700/5005]\tTime  0.639 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.9424e-01 (6.7711e-01)\n",
      "Epoch: [19][3750/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 5.7106e-01 (6.7709e-01)\n",
      "Epoch: [19][3800/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.5360e-01 (6.7707e-01)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: [19][3850/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.5847e-01 (6.7714e-01)\n",
      "Epoch: [19][3900/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.1058e-01 (6.7694e-01)\n",
      "Epoch: [19][3950/5005]\tTime  0.635 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.7047e-01 (6.7679e-01)\n",
      "Epoch: [19][4000/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.0957e-01 (6.7657e-01)\n",
      "Epoch: [19][4050/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.2115e-01 (6.7661e-01)\n",
      "Epoch: [19][4100/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.4113e-01 (6.7662e-01)\n",
      "Epoch: [19][4150/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.9383e-01 (6.7668e-01)\n",
      "Epoch: [19][4200/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 5.4988e-01 (6.7670e-01)\n",
      "Epoch: [19][4250/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.0203e-01 (6.7641e-01)\n",
      "Epoch: [19][4300/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.7986e-01 (6.7651e-01)\n",
      "Epoch: [19][4350/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.7321e-01 (6.7643e-01)\n",
      "Epoch: [19][4400/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.3960e-01 (6.7630e-01)\n",
      "Epoch: [19][4450/5005]\tTime  0.635 ( 0.637)\tData  0.000 ( 0.001)\tLoss 8.0119e-01 (6.7595e-01)\n",
      "Epoch: [19][4500/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.6729e-01 (6.7593e-01)\n",
      "Epoch: [19][4550/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.3188e-01 (6.7587e-01)\n",
      "Epoch: [19][4600/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.9895e-01 (6.7630e-01)\n",
      "Epoch: [19][4650/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.3567e-01 (6.7660e-01)\n",
      "Epoch: [19][4700/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.1595e-01 (6.7646e-01)\n",
      "Epoch: [19][4750/5005]\tTime  0.638 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.9939e-01 (6.7648e-01)\n",
      "Epoch: [19][4800/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.8745e-01 (6.7629e-01)\n",
      "Epoch: [19][4850/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 5.3331e-01 (6.7628e-01)\n",
      "Epoch: [19][4900/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.7081e-01 (6.7593e-01)\n",
      "Epoch: [19][4950/5005]\tTime  0.636 ( 0.637)\tData  0.000 ( 0.001)\tLoss 7.6700e-01 (6.7589e-01)\n",
      "Epoch: [19][5000/5005]\tTime  0.637 ( 0.637)\tData  0.000 ( 0.001)\tLoss 6.0560e-01 (6.7569e-01)\n",
      "Test: [  0/196]\tTime  3.495 ( 3.495)\tLoss 4.9949e-01 (4.9949e-01)\tAcc@1  86.72 ( 86.72)\tAcc@5  97.66 ( 97.66)\n",
      "Test: [ 50/196]\tTime  0.434 ( 0.493)\tLoss 4.0775e-01 (6.8128e-01)\tAcc@1  89.84 ( 82.02)\tAcc@5  98.05 ( 95.87)\n",
      "Test: [100/196]\tTime  0.434 ( 0.464)\tLoss 1.2018e+00 (8.0674e-01)\tAcc@1  65.23 ( 78.89)\tAcc@5  91.41 ( 94.65)\n",
      "Test: [150/196]\tTime  0.434 ( 0.454)\tLoss 1.1127e+00 (9.1676e-01)\tAcc@1  77.34 ( 76.63)\tAcc@5  90.23 ( 93.28)\n",
      "epoch 19 0.6756833399803545 75.7760009765625 0.0 4688969 0.19999437845311407\n",
      "acc 75.7760009765625\n"
     ]
    }
   ],
   "source": [
    "def get_res(epochs=20):\n",
    "    from torchvision.models import resnet50\n",
    "    model = resnet50(pretrained=True)\n",
    "    model.cuda()\n",
    "    criterion_val = nn.CrossEntropyLoss()\n",
    "    #acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    #print(\"dense acc\", acc1)\n",
    "    \n",
    "    total_params = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if type(m) == nn.Conv2d and m.weight.shape[1] > 3:\n",
    "            total_params += m.weight.numel()\n",
    "    print(\"tot\", total_params)\n",
    "    \n",
    "    model = run_dsp(model)\n",
    "    \n",
    "    \n",
    "    #optimizer = torch.optim.AdamW(model.parameters(), 0.001)\n",
    "    opt0 = torch.optim.SGD(model.parameters(), 0.0, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    optimizer = torch.optim.SGD(model.parameters(), 0.01, momentum=0.9, nesterov=True, weight_decay=1e-4)\n",
    "    #scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2], gamma=0.1)\n",
    "    scheduler = torch.optim.lr_scheduler.PolynomialLR(optimizer, total_iters=epochs, power=1)\n",
    "    #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 0.004, epochs, cycle_momentum=False)\n",
    "    criterion = nn.CrossEntropyLoss()#SoftTargetCrossEntropy()\n",
    "    criterion_val = nn.CrossEntropyLoss()\n",
    "    scaler = torch.cuda.amp.GradScaler(enabled=True)\n",
    "    \n",
    "    best_acc1 = 0\n",
    "    \n",
    "    print(model, file=sys.stderr)\n",
    "    \n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    print(\"start acc no bn\", acc1)\n",
    "    train_loss = train(train_loader, model, criterion, opt0, scaler, -1)\n",
    "    acc1 = validate(val_loader, model, criterion_val).item()\n",
    "    total_active = 0\n",
    "    for n, m in model.named_modules():\n",
    "        if type(m) == nn.Conv2d and m.weight.shape[1] > 3 and (\"conv2b\" not in n and \"conv1b\" not in n and \"sb\" not in n and \"conv3b\" not in n):\n",
    "            total_active += (m.weight != 0).sum().item()\n",
    "    print(\"start acc bn\", acc1, total_active)\n",
    "\n",
    "    for epoch in range(epochs):\n",
    "        train_loss = train(train_loader, model, criterion, optimizer, scaler, epoch)\n",
    "        acc1 = validate(val_loader, model, criterion_val).item()\n",
    "        scheduler.step()\n",
    "        \n",
    "        # remember best acc@1 and save checkpoint\n",
    "        is_best = acc1 > best_acc1\n",
    "        best_acc1 = max(acc1, best_acc1)\n",
    "        total_active = 0\n",
    "        for n, m in model.named_modules():\n",
    "            if type(m) == nn.Conv2d and m.weight.shape[1] > 3 and (\"conv2b\" not in n and \"conv1b\" not in n and \"sb\" not in n and \"conv3b\" not in n):\n",
    "                total_active += (m.weight != 0).sum().item()\n",
    "\n",
    "        print(\"epoch\", epoch, train_loss, acc1, optimizer.param_groups[0]['lr'], total_active, total_active / total_params)\n",
    "    \n",
    "    return acc1, copy.deepcopy(model.state_dict())\n",
    "\n",
    "acc, end = get_res()\n",
    "\n",
    "print(\"acc\", acc)"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
