| distributed init (rank 0): env://, gpu 0
| distributed init (rank 2): env://, gpu 2
| distributed init (rank 3): env://, gpu 3
| distributed init (rank 1): env://, gpu 1
Namespace(batch_size=128, epochs=300, update_freq=4, model='small', drop_path=0, input_size=224, layer_scale_init_value=1e-06, model_ema=False, model_ema_decay=0.9999, model_ema_force_cpu=False, model_ema_eval=False, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=5.0, momentum=0.9, weight_decay=0.05, weight_decay_end=None, lr=0.004, layer_decay=1.0, min_lr=1e-06, warmup_epochs=20, warmup_steps=-1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', crop_pct=None, reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', finetune='', head_init_scale=1.0, model_key='model|module', model_prefix='', data_path='/dev/shm/imagenet', eval_data_path=None, nb_classes=1000, imagenet_default_mean_and_std=True, data_set='IMNET', output_dir='./checkpoint_small_4.5G', log_dir=None, device='cuda', seed=0, resume='', auto_resume=True, save_ckpt=True, save_ckpt_freq=1, save_ckpt_num=3, start_epoch=0, eval=False, dist_eval=True, disable_eval=False, num_workers=10, pin_mem=True, world_size=4, local_rank=-1, dist_on_itp=False, dist_url='env://', use_amp=True, enable_wandb=False, project='convnext', wandb_ckpt=False, rank=0, gpu=0, distributed=True, dist_backend='nccl')
Transform = 
RandomResizedCropAndInterpolation(size=(224, 224), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=bicubic)
RandomHorizontalFlip(p=0.5)
RandAugment(n=2, ops=
	AugmentOp(name=AutoContrast, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Equalize, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Invert, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=Rotate, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=PosterizeIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SolarizeIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SolarizeAdd, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ColorIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ContrastIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=BrightnessIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=SharpnessIncreasing, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ShearX, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=ShearY, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=TranslateXRel, p=0.5, m=9, mstd=0.5)
	AugmentOp(name=TranslateYRel, p=0.5, m=9, mstd=0.5))
ToTensor()
Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
RandomErasing(p=0.25, mode=pixel, count=(1, 1))
---------------------------
reading from datapath /dev/shm/imagenet
Number of the class = 1000
Transform = 
Resize(size=256, interpolation=bicubic, max_size=None, antialias=True)
CenterCrop(size=(224, 224))
ToTensor()
Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
---------------------------
reading from datapath /dev/shm/imagenet
Number of the class = 1000
Sampler_train = <torch.utils.data.distributed.DistributedSampler object at 0x7f2388999150>
Mixup is activated!
Model = SFCNN(
  (first_conv): ConvX(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
  )
  (layer1): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=32, bias=False)
          (norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): Identity()
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.004)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.008)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.012)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.016)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
        (norm): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.020)
    )
  )
  (layer2): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=64, bias=False)
          (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): DropPath(drop_prob=0.024)
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.027)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.031)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.035)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.039)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.043)
    )
    (6): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.047)
    )
    (7): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.051)
    )
    (8): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.055)
    )
    (9): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.059)
    )
    (10): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.063)
    )
    (11): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
        (norm): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.067)
    )
  )
  (layer3): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)
        (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(128, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=128, bias=False)
          (norm): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): DropPath(drop_prob=0.071)
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.075)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.078)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.082)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.086)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.090)
    )
    (6): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.094)
    )
    (7): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.098)
    )
    (8): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.102)
    )
    (9): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.106)
    )
    (10): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.110)
    )
    (11): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.114)
    )
    (12): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.118)
    )
    (13): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.122)
    )
    (14): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.125)
    )
    (15): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.129)
    )
    (16): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.133)
    )
    (17): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.137)
    )
    (18): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.141)
    )
    (19): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.145)
    )
    (20): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.149)
    )
    (21): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.153)
    )
    (22): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.157)
    )
    (23): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.161)
    )
    (24): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.165)
    )
    (25): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.169)
    )
    (26): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.173)
    )
    (27): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 768, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(768, 768, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=768, bias=False)
        (norm): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.176)
    )
  )
  (layer4): Sequential(
    (0): DropBottleNeck(
      (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (le): ConvX(
        (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)
        (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(256, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=1536, bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Sequential(
        (0): ConvX(
          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=256, bias=False)
          (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
        (1): ConvX(
          (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (act): Act()
        )
      )
      (drop_path): DropPath(drop_prob=0.180)
    )
    (1): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536, bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.184)
    )
    (2): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536, bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.188)
    )
    (3): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536, bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.192)
    )
    (4): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536, bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.196)
    )
    (5): DropBottleNeck(
      (le): ConvX(
        (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512, bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (conv_in): ConvX(
        (conv): Conv2d(512, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): SiLU(inplace=True)
        )
      )
      (conv): ConvX(
        (conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536, bias=False)
        (norm): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act(
          (act): GSiLU()
        )
      )
      (conv_out): ConvX(
        (conv): Conv2d(1536, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): Act()
      )
      (skip): Identity()
      (drop_path): DropPath(drop_prob=0.200)
    )
  )
  (head): ConvX(
    (conv): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
  )
  (gap): AdaptiveAvgPool2d(output_size=1)
  (classifier): MlpHead(
    (fc1): Linear(in_features=1024, out_features=2048, bias=False)
    (norm): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act): Act(
      (act): SiLU(inplace=True)
    )
    (drop): Dropout(p=0.2, inplace=False)
    (fc2): Linear(in_features=2048, out_features=1000, bias=False)
  )
)
number of params: 26737888
LR = 0.00400000
Batch size = 2048
Update frequent = 4
Number of training examples = 1281167
Number of training training per epoch = 625
Param groups = {
  "decay": {
    "weight_decay": 0.05,
    "params": [
      "first_conv.conv.weight",
      "layer1.0.le.conv.weight",
      "layer1.0.conv_in.conv.weight",
      "layer1.0.conv.conv.weight",
      "layer1.0.conv_out.conv.weight",
      "layer1.0.skip.0.conv.weight",
      "layer1.0.skip.1.conv.weight",
      "layer1.1.le.conv.weight",
      "layer1.1.conv_in.conv.weight",
      "layer1.1.conv.conv.weight",
      "layer1.1.conv_out.conv.weight",
      "layer1.2.le.conv.weight",
      "layer1.2.conv_in.conv.weight",
      "layer1.2.conv.conv.weight",
      "layer1.2.conv_out.conv.weight",
      "layer1.3.le.conv.weight",
      "layer1.3.conv_in.conv.weight",
      "layer1.3.conv.conv.weight",
      "layer1.3.conv_out.conv.weight",
      "layer1.4.le.conv.weight",
      "layer1.4.conv_in.conv.weight",
      "layer1.4.conv.conv.weight",
      "layer1.4.conv_out.conv.weight",
      "layer1.5.le.conv.weight",
      "layer1.5.conv_in.conv.weight",
      "layer1.5.conv.conv.weight",
      "layer1.5.conv_out.conv.weight",
      "layer2.0.le.conv.weight",
      "layer2.0.conv_in.conv.weight",
      "layer2.0.conv.conv.weight",
      "layer2.0.conv_out.conv.weight",
      "layer2.0.skip.0.conv.weight",
      "layer2.0.skip.1.conv.weight",
      "layer2.1.le.conv.weight",
      "layer2.1.conv_in.conv.weight",
      "layer2.1.conv.conv.weight",
      "layer2.1.conv_out.conv.weight",
      "layer2.2.le.conv.weight",
      "layer2.2.conv_in.conv.weight",
      "layer2.2.conv.conv.weight",
      "layer2.2.conv_out.conv.weight",
      "layer2.3.le.conv.weight",
      "layer2.3.conv_in.conv.weight",
      "layer2.3.conv.conv.weight",
      "layer2.3.conv_out.conv.weight",
      "layer2.4.le.conv.weight",
      "layer2.4.conv_in.conv.weight",
      "layer2.4.conv.conv.weight",
      "layer2.4.conv_out.conv.weight",
      "layer2.5.le.conv.weight",
      "layer2.5.conv_in.conv.weight",
      "layer2.5.conv.conv.weight",
      "layer2.5.conv_out.conv.weight",
      "layer2.6.le.conv.weight",
      "layer2.6.conv_in.conv.weight",
      "layer2.6.conv.conv.weight",
      "layer2.6.conv_out.conv.weight",
      "layer2.7.le.conv.weight",
      "layer2.7.conv_in.conv.weight",
      "layer2.7.conv.conv.weight",
      "layer2.7.conv_out.conv.weight",
      "layer2.8.le.conv.weight",
      "layer2.8.conv_in.conv.weight",
      "layer2.8.conv.conv.weight",
      "layer2.8.conv_out.conv.weight",
      "layer2.9.le.conv.weight",
      "layer2.9.conv_in.conv.weight",
      "layer2.9.conv.conv.weight",
      "layer2.9.conv_out.conv.weight",
      "layer2.10.le.conv.weight",
      "layer2.10.conv_in.conv.weight",
      "layer2.10.conv.conv.weight",
      "layer2.10.conv_out.conv.weight",
      "layer2.11.le.conv.weight",
      "layer2.11.conv_in.conv.weight",
      "layer2.11.conv.conv.weight",
      "layer2.11.conv_out.conv.weight",
      "layer3.0.le.conv.weight",
      "layer3.0.conv_in.conv.weight",
      "layer3.0.conv.conv.weight",
      "layer3.0.conv_out.conv.weight",
      "layer3.0.skip.0.conv.weight",
      "layer3.0.skip.1.conv.weight",
      "layer3.1.le.conv.weight",
      "layer3.1.conv_in.conv.weight",
      "layer3.1.conv.conv.weight",
      "layer3.1.conv_out.conv.weight",
      "layer3.2.le.conv.weight",
      "layer3.2.conv_in.conv.weight",
      "layer3.2.conv.conv.weight",
      "layer3.2.conv_out.conv.weight",
      "layer3.3.le.conv.weight",
      "layer3.3.conv_in.conv.weight",
      "layer3.3.conv.conv.weight",
      "layer3.3.conv_out.conv.weight",
      "layer3.4.le.conv.weight",
      "layer3.4.conv_in.conv.weight",
      "layer3.4.conv.conv.weight",
      "layer3.4.conv_out.conv.weight",
      "layer3.5.le.conv.weight",
      "layer3.5.conv_in.conv.weight",
      "layer3.5.conv.conv.weight",
      "layer3.5.conv_out.conv.weight",
      "layer3.6.le.conv.weight",
      "layer3.6.conv_in.conv.weight",
      "layer3.6.conv.conv.weight",
      "layer3.6.conv_out.conv.weight",
      "layer3.7.le.conv.weight",
      "layer3.7.conv_in.conv.weight",
      "layer3.7.conv.conv.weight",
      "layer3.7.conv_out.conv.weight",
      "layer3.8.le.conv.weight",
      "layer3.8.conv_in.conv.weight",
      "layer3.8.conv.conv.weight",
      "layer3.8.conv_out.conv.weight",
      "layer3.9.le.conv.weight",
      "layer3.9.conv_in.conv.weight",
      "layer3.9.conv.conv.weight",
      "layer3.9.conv_out.conv.weight",
      "layer3.10.le.conv.weight",
      "layer3.10.conv_in.conv.weight",
      "layer3.10.conv.conv.weight",
      "layer3.10.conv_out.conv.weight",
      "layer3.11.le.conv.weight",
      "layer3.11.conv_in.conv.weight",
      "layer3.11.conv.conv.weight",
      "layer3.11.conv_out.conv.weight",
      "layer3.12.le.conv.weight",
      "layer3.12.conv_in.conv.weight",
      "layer3.12.conv.conv.weight",
      "layer3.12.conv_out.conv.weight",
      "layer3.13.le.conv.weight",
      "layer3.13.conv_in.conv.weight",
      "layer3.13.conv.conv.weight",
      "layer3.13.conv_out.conv.weight",
      "layer3.14.le.conv.weight",
      "layer3.14.conv_in.conv.weight",
      "layer3.14.conv.conv.weight",
      "layer3.14.conv_out.conv.weight",
      "layer3.15.le.conv.weight",
      "layer3.15.conv_in.conv.weight",
      "layer3.15.conv.conv.weight",
      "layer3.15.conv_out.conv.weight",
      "layer3.16.le.conv.weight",
      "layer3.16.conv_in.conv.weight",
      "layer3.16.conv.conv.weight",
      "layer3.16.conv_out.conv.weight",
      "layer3.17.le.conv.weight",
      "layer3.17.conv_in.conv.weight",
      "layer3.17.conv.conv.weight",
      "layer3.17.conv_out.conv.weight",
      "layer3.18.le.conv.weight",
      "layer3.18.conv_in.conv.weight",
      "layer3.18.conv.conv.weight",
      "layer3.18.conv_out.conv.weight",
      "layer3.19.le.conv.weight",
      "layer3.19.conv_in.conv.weight",
      "layer3.19.conv.conv.weight",
      "layer3.19.conv_out.conv.weight",
      "layer3.20.le.conv.weight",
      "layer3.20.conv_in.conv.weight",
      "layer3.20.conv.conv.weight",
      "layer3.20.conv_out.conv.weight",
      "layer3.21.le.conv.weight",
      "layer3.21.conv_in.conv.weight",
      "layer3.21.conv.conv.weight",
      "layer3.21.conv_out.conv.weight",
      "layer3.22.le.conv.weight",
      "layer3.22.conv_in.conv.weight",
      "layer3.22.conv.conv.weight",
      "layer3.22.conv_out.conv.weight",
      "layer3.23.le.conv.weight",
      "layer3.23.conv_in.conv.weight",
      "layer3.23.conv.conv.weight",
      "layer3.23.conv_out.conv.weight",
      "layer3.24.le.conv.weight",
      "layer3.24.conv_in.conv.weight",
      "layer3.24.conv.conv.weight",
      "layer3.24.conv_out.conv.weight",
      "layer3.25.le.conv.weight",
      "layer3.25.conv_in.conv.weight",
      "layer3.25.conv.conv.weight",
      "layer3.25.conv_out.conv.weight",
      "layer3.26.le.conv.weight",
      "layer3.26.conv_in.conv.weight",
      "layer3.26.conv.conv.weight",
      "layer3.26.conv_out.conv.weight",
      "layer3.27.le.conv.weight",
      "layer3.27.conv_in.conv.weight",
      "layer3.27.conv.conv.weight",
      "layer3.27.conv_out.conv.weight",
      "layer4.0.le.conv.weight",
      "layer4.0.conv_in.conv.weight",
      "layer4.0.conv.conv.weight",
      "layer4.0.conv_out.conv.weight",
      "layer4.0.skip.0.conv.weight",
      "layer4.0.skip.1.conv.weight",
      "layer4.1.le.conv.weight",
      "layer4.1.conv_in.conv.weight",
      "layer4.1.conv.conv.weight",
      "layer4.1.conv_out.conv.weight",
      "layer4.2.le.conv.weight",
      "layer4.2.conv_in.conv.weight",
      "layer4.2.conv.conv.weight",
      "layer4.2.conv_out.conv.weight",
      "layer4.3.le.conv.weight",
      "layer4.3.conv_in.conv.weight",
      "layer4.3.conv.conv.weight",
      "layer4.3.conv_out.conv.weight",
      "layer4.4.le.conv.weight",
      "layer4.4.conv_in.conv.weight",
      "layer4.4.conv.conv.weight",
      "layer4.4.conv_out.conv.weight",
      "layer4.5.le.conv.weight",
      "layer4.5.conv_in.conv.weight",
      "layer4.5.conv.conv.weight",
      "layer4.5.conv_out.conv.weight",
      "head.conv.weight",
      "classifier.fc1.weight",
      "classifier.fc2.weight"
    ],
    "lr_scale": 1.0
  },
  "no_decay": {
    "weight_decay": 0.0,
    "params": [
      "first_conv.norm.weight",
      "first_conv.norm.bias",
      "layer1.0.ln.weight",
      "layer1.0.ln.bias",
      "layer1.0.le.norm.weight",
      "layer1.0.le.norm.bias",
      "layer1.0.conv_in.norm.weight",
      "layer1.0.conv_in.norm.bias",
      "layer1.0.conv.norm.weight",
      "layer1.0.conv.norm.bias",
      "layer1.0.conv_out.norm.weight",
      "layer1.0.conv_out.norm.bias",
      "layer1.0.skip.0.norm.weight",
      "layer1.0.skip.0.norm.bias",
      "layer1.0.skip.1.norm.weight",
      "layer1.0.skip.1.norm.bias",
      "layer1.1.le.norm.weight",
      "layer1.1.le.norm.bias",
      "layer1.1.conv_in.norm.weight",
      "layer1.1.conv_in.norm.bias",
      "layer1.1.conv.norm.weight",
      "layer1.1.conv.norm.bias",
      "layer1.1.conv_out.norm.weight",
      "layer1.1.conv_out.norm.bias",
      "layer1.2.le.norm.weight",
      "layer1.2.le.norm.bias",
      "layer1.2.conv_in.norm.weight",
      "layer1.2.conv_in.norm.bias",
      "layer1.2.conv.norm.weight",
      "layer1.2.conv.norm.bias",
      "layer1.2.conv_out.norm.weight",
      "layer1.2.conv_out.norm.bias",
      "layer1.3.le.norm.weight",
      "layer1.3.le.norm.bias",
      "layer1.3.conv_in.norm.weight",
      "layer1.3.conv_in.norm.bias",
      "layer1.3.conv.norm.weight",
      "layer1.3.conv.norm.bias",
      "layer1.3.conv_out.norm.weight",
      "layer1.3.conv_out.norm.bias",
      "layer1.4.le.norm.weight",
      "layer1.4.le.norm.bias",
      "layer1.4.conv_in.norm.weight",
      "layer1.4.conv_in.norm.bias",
      "layer1.4.conv.norm.weight",
      "layer1.4.conv.norm.bias",
      "layer1.4.conv_out.norm.weight",
      "layer1.4.conv_out.norm.bias",
      "layer1.5.le.norm.weight",
      "layer1.5.le.norm.bias",
      "layer1.5.conv_in.norm.weight",
      "layer1.5.conv_in.norm.bias",
      "layer1.5.conv.norm.weight",
      "layer1.5.conv.norm.bias",
      "layer1.5.conv_out.norm.weight",
      "layer1.5.conv_out.norm.bias",
      "layer2.0.ln.weight",
      "layer2.0.ln.bias",
      "layer2.0.le.norm.weight",
      "layer2.0.le.norm.bias",
      "layer2.0.conv_in.norm.weight",
      "layer2.0.conv_in.norm.bias",
      "layer2.0.conv.norm.weight",
      "layer2.0.conv.norm.bias",
      "layer2.0.conv_out.norm.weight",
      "layer2.0.conv_out.norm.bias",
      "layer2.0.skip.0.norm.weight",
      "layer2.0.skip.0.norm.bias",
      "layer2.0.skip.1.norm.weight",
      "layer2.0.skip.1.norm.bias",
      "layer2.1.le.norm.weight",
      "layer2.1.le.norm.bias",
      "layer2.1.conv_in.norm.weight",
      "layer2.1.conv_in.norm.bias",
      "layer2.1.conv.norm.weight",
      "layer2.1.conv.norm.bias",
      "layer2.1.conv_out.norm.weight",
      "layer2.1.conv_out.norm.bias",
      "layer2.2.le.norm.weight",
      "layer2.2.le.norm.bias",
      "layer2.2.conv_in.norm.weight",
      "layer2.2.conv_in.norm.bias",
      "layer2.2.conv.norm.weight",
      "layer2.2.conv.norm.bias",
      "layer2.2.conv_out.norm.weight",
      "layer2.2.conv_out.norm.bias",
      "layer2.3.le.norm.weight",
      "layer2.3.le.norm.bias",
      "layer2.3.conv_in.norm.weight",
      "layer2.3.conv_in.norm.bias",
      "layer2.3.conv.norm.weight",
      "layer2.3.conv.norm.bias",
      "layer2.3.conv_out.norm.weight",
      "layer2.3.conv_out.norm.bias",
      "layer2.4.le.norm.weight",
      "layer2.4.le.norm.bias",
      "layer2.4.conv_in.norm.weight",
      "layer2.4.conv_in.norm.bias",
      "layer2.4.conv.norm.weight",
      "layer2.4.conv.norm.bias",
      "layer2.4.conv_out.norm.weight",
      "layer2.4.conv_out.norm.bias",
      "layer2.5.le.norm.weight",
      "layer2.5.le.norm.bias",
      "layer2.5.conv_in.norm.weight",
      "layer2.5.conv_in.norm.bias",
      "layer2.5.conv.norm.weight",
      "layer2.5.conv.norm.bias",
      "layer2.5.conv_out.norm.weight",
      "layer2.5.conv_out.norm.bias",
      "layer2.6.le.norm.weight",
      "layer2.6.le.norm.bias",
      "layer2.6.conv_in.norm.weight",
      "layer2.6.conv_in.norm.bias",
      "layer2.6.conv.norm.weight",
      "layer2.6.conv.norm.bias",
      "layer2.6.conv_out.norm.weight",
      "layer2.6.conv_out.norm.bias",
      "layer2.7.le.norm.weight",
      "layer2.7.le.norm.bias",
      "layer2.7.conv_in.norm.weight",
      "layer2.7.conv_in.norm.bias",
      "layer2.7.conv.norm.weight",
      "layer2.7.conv.norm.bias",
      "layer2.7.conv_out.norm.weight",
      "layer2.7.conv_out.norm.bias",
      "layer2.8.le.norm.weight",
      "layer2.8.le.norm.bias",
      "layer2.8.conv_in.norm.weight",
      "layer2.8.conv_in.norm.bias",
      "layer2.8.conv.norm.weight",
      "layer2.8.conv.norm.bias",
      "layer2.8.conv_out.norm.weight",
      "layer2.8.conv_out.norm.bias",
      "layer2.9.le.norm.weight",
      "layer2.9.le.norm.bias",
      "layer2.9.conv_in.norm.weight",
      "layer2.9.conv_in.norm.bias",
      "layer2.9.conv.norm.weight",
      "layer2.9.conv.norm.bias",
      "layer2.9.conv_out.norm.weight",
      "layer2.9.conv_out.norm.bias",
      "layer2.10.le.norm.weight",
      "layer2.10.le.norm.bias",
      "layer2.10.conv_in.norm.weight",
      "layer2.10.conv_in.norm.bias",
      "layer2.10.conv.norm.weight",
      "layer2.10.conv.norm.bias",
      "layer2.10.conv_out.norm.weight",
      "layer2.10.conv_out.norm.bias",
      "layer2.11.le.norm.weight",
      "layer2.11.le.norm.bias",
      "layer2.11.conv_in.norm.weight",
      "layer2.11.conv_in.norm.bias",
      "layer2.11.conv.norm.weight",
      "layer2.11.conv.norm.bias",
      "layer2.11.conv_out.norm.weight",
      "layer2.11.conv_out.norm.bias",
      "layer3.0.ln.weight",
      "layer3.0.ln.bias",
      "layer3.0.le.norm.weight",
      "layer3.0.le.norm.bias",
      "layer3.0.conv_in.norm.weight",
      "layer3.0.conv_in.norm.bias",
      "layer3.0.conv.norm.weight",
      "layer3.0.conv.norm.bias",
      "layer3.0.conv_out.norm.weight",
      "layer3.0.conv_out.norm.bias",
      "layer3.0.skip.0.norm.weight",
      "layer3.0.skip.0.norm.bias",
      "layer3.0.skip.1.norm.weight",
      "layer3.0.skip.1.norm.bias",
      "layer3.1.le.norm.weight",
      "layer3.1.le.norm.bias",
      "layer3.1.conv_in.norm.weight",
      "layer3.1.conv_in.norm.bias",
      "layer3.1.conv.norm.weight",
      "layer3.1.conv.norm.bias",
      "layer3.1.conv_out.norm.weight",
      "layer3.1.conv_out.norm.bias",
      "layer3.2.le.norm.weight",
      "layer3.2.le.norm.bias",
      "layer3.2.conv_in.norm.weight",
      "layer3.2.conv_in.norm.bias",
      "layer3.2.conv.norm.weight",
      "layer3.2.conv.norm.bias",
      "layer3.2.conv_out.norm.weight",
      "layer3.2.conv_out.norm.bias",
      "layer3.3.le.norm.weight",
      "layer3.3.le.norm.bias",
      "layer3.3.conv_in.norm.weight",
      "layer3.3.conv_in.norm.bias",
      "layer3.3.conv.norm.weight",
      "layer3.3.conv.norm.bias",
      "layer3.3.conv_out.norm.weight",
      "layer3.3.conv_out.norm.bias",
      "layer3.4.le.norm.weight",
      "layer3.4.le.norm.bias",
      "layer3.4.conv_in.norm.weight",
      "layer3.4.conv_in.norm.bias",
      "layer3.4.conv.norm.weight",
      "layer3.4.conv.norm.bias",
      "layer3.4.conv_out.norm.weight",
      "layer3.4.conv_out.norm.bias",
      "layer3.5.le.norm.weight",
      "layer3.5.le.norm.bias",
      "layer3.5.conv_in.norm.weight",
      "layer3.5.conv_in.norm.bias",
      "layer3.5.conv.norm.weight",
      "layer3.5.conv.norm.bias",
      "layer3.5.conv_out.norm.weight",
      "layer3.5.conv_out.norm.bias",
      "layer3.6.le.norm.weight",
      "layer3.6.le.norm.bias",
      "layer3.6.conv_in.norm.weight",
      "layer3.6.conv_in.norm.bias",
      "layer3.6.conv.norm.weight",
      "layer3.6.conv.norm.bias",
      "layer3.6.conv_out.norm.weight",
      "layer3.6.conv_out.norm.bias",
      "layer3.7.le.norm.weight",
      "layer3.7.le.norm.bias",
      "layer3.7.conv_in.norm.weight",
      "layer3.7.conv_in.norm.bias",
      "layer3.7.conv.norm.weight",
      "layer3.7.conv.norm.bias",
      "layer3.7.conv_out.norm.weight",
      "layer3.7.conv_out.norm.bias",
      "layer3.8.le.norm.weight",
      "layer3.8.le.norm.bias",
      "layer3.8.conv_in.norm.weight",
      "layer3.8.conv_in.norm.bias",
      "layer3.8.conv.norm.weight",
      "layer3.8.conv.norm.bias",
      "layer3.8.conv_out.norm.weight",
      "layer3.8.conv_out.norm.bias",
      "layer3.9.le.norm.weight",
      "layer3.9.le.norm.bias",
      "layer3.9.conv_in.norm.weight",
      "layer3.9.conv_in.norm.bias",
      "layer3.9.conv.norm.weight",
      "layer3.9.conv.norm.bias",
      "layer3.9.conv_out.norm.weight",
      "layer3.9.conv_out.norm.bias",
      "layer3.10.le.norm.weight",
      "layer3.10.le.norm.bias",
      "layer3.10.conv_in.norm.weight",
      "layer3.10.conv_in.norm.bias",
      "layer3.10.conv.norm.weight",
      "layer3.10.conv.norm.bias",
      "layer3.10.conv_out.norm.weight",
      "layer3.10.conv_out.norm.bias",
      "layer3.11.le.norm.weight",
      "layer3.11.le.norm.bias",
      "layer3.11.conv_in.norm.weight",
      "layer3.11.conv_in.norm.bias",
      "layer3.11.conv.norm.weight",
      "layer3.11.conv.norm.bias",
      "layer3.11.conv_out.norm.weight",
      "layer3.11.conv_out.norm.bias",
      "layer3.12.le.norm.weight",
      "layer3.12.le.norm.bias",
      "layer3.12.conv_in.norm.weight",
      "layer3.12.conv_in.norm.bias",
      "layer3.12.conv.norm.weight",
      "layer3.12.conv.norm.bias",
      "layer3.12.conv_out.norm.weight",
      "layer3.12.conv_out.norm.bias",
      "layer3.13.le.norm.weight",
      "layer3.13.le.norm.bias",
      "layer3.13.conv_in.norm.weight",
      "layer3.13.conv_in.norm.bias",
      "layer3.13.conv.norm.weight",
      "layer3.13.conv.norm.bias",
      "layer3.13.conv_out.norm.weight",
      "layer3.13.conv_out.norm.bias",
      "layer3.14.le.norm.weight",
      "layer3.14.le.norm.bias",
      "layer3.14.conv_in.norm.weight",
      "layer3.14.conv_in.norm.bias",
      "layer3.14.conv.norm.weight",
      "layer3.14.conv.norm.bias",
      "layer3.14.conv_out.norm.weight",
      "layer3.14.conv_out.norm.bias",
      "layer3.15.le.norm.weight",
      "layer3.15.le.norm.bias",
      "layer3.15.conv_in.norm.weight",
      "layer3.15.conv_in.norm.bias",
      "layer3.15.conv.norm.weight",
      "layer3.15.conv.norm.bias",
      "layer3.15.conv_out.norm.weight",
      "layer3.15.conv_out.norm.bias",
      "layer3.16.le.norm.weight",
      "layer3.16.le.norm.bias",
      "layer3.16.conv_in.norm.weight",
      "layer3.16.conv_in.norm.bias",
      "layer3.16.conv.norm.weight",
      "layer3.16.conv.norm.bias",
      "layer3.16.conv_out.norm.weight",
      "layer3.16.conv_out.norm.bias",
      "layer3.17.le.norm.weight",
      "layer3.17.le.norm.bias",
      "layer3.17.conv_in.norm.weight",
      "layer3.17.conv_in.norm.bias",
      "layer3.17.conv.norm.weight",
      "layer3.17.conv.norm.bias",
      "layer3.17.conv_out.norm.weight",
      "layer3.17.conv_out.norm.bias",
      "layer3.18.le.norm.weight",
      "layer3.18.le.norm.bias",
      "layer3.18.conv_in.norm.weight",
      "layer3.18.conv_in.norm.bias",
      "layer3.18.conv.norm.weight",
      "layer3.18.conv.norm.bias",
      "layer3.18.conv_out.norm.weight",
      "layer3.18.conv_out.norm.bias",
      "layer3.19.le.norm.weight",
      "layer3.19.le.norm.bias",
      "layer3.19.conv_in.norm.weight",
      "layer3.19.conv_in.norm.bias",
      "layer3.19.conv.norm.weight",
      "layer3.19.conv.norm.bias",
      "layer3.19.conv_out.norm.weight",
      "layer3.19.conv_out.norm.bias",
      "layer3.20.le.norm.weight",
      "layer3.20.le.norm.bias",
      "layer3.20.conv_in.norm.weight",
      "layer3.20.conv_in.norm.bias",
      "layer3.20.conv.norm.weight",
      "layer3.20.conv.norm.bias",
      "layer3.20.conv_out.norm.weight",
      "layer3.20.conv_out.norm.bias",
      "layer3.21.le.norm.weight",
      "layer3.21.le.norm.bias",
      "layer3.21.conv_in.norm.weight",
      "layer3.21.conv_in.norm.bias",
      "layer3.21.conv.norm.weight",
      "layer3.21.conv.norm.bias",
      "layer3.21.conv_out.norm.weight",
      "layer3.21.conv_out.norm.bias",
      "layer3.22.le.norm.weight",
      "layer3.22.le.norm.bias",
      "layer3.22.conv_in.norm.weight",
      "layer3.22.conv_in.norm.bias",
      "layer3.22.conv.norm.weight",
      "layer3.22.conv.norm.bias",
      "layer3.22.conv_out.norm.weight",
      "layer3.22.conv_out.norm.bias",
      "layer3.23.le.norm.weight",
      "layer3.23.le.norm.bias",
      "layer3.23.conv_in.norm.weight",
      "layer3.23.conv_in.norm.bias",
      "layer3.23.conv.norm.weight",
      "layer3.23.conv.norm.bias",
      "layer3.23.conv_out.norm.weight",
      "layer3.23.conv_out.norm.bias",
      "layer3.24.le.norm.weight",
      "layer3.24.le.norm.bias",
      "layer3.24.conv_in.norm.weight",
      "layer3.24.conv_in.norm.bias",
      "layer3.24.conv.norm.weight",
      "layer3.24.conv.norm.bias",
      "layer3.24.conv_out.norm.weight",
      "layer3.24.conv_out.norm.bias",
      "layer3.25.le.norm.weight",
      "layer3.25.le.norm.bias",
      "layer3.25.conv_in.norm.weight",
      "layer3.25.conv_in.norm.bias",
      "layer3.25.conv.norm.weight",
      "layer3.25.conv.norm.bias",
      "layer3.25.conv_out.norm.weight",
      "layer3.25.conv_out.norm.bias",
      "layer3.26.le.norm.weight",
      "layer3.26.le.norm.bias",
      "layer3.26.conv_in.norm.weight",
      "layer3.26.conv_in.norm.bias",
      "layer3.26.conv.norm.weight",
      "layer3.26.conv.norm.bias",
      "layer3.26.conv_out.norm.weight",
      "layer3.26.conv_out.norm.bias",
      "layer3.27.le.norm.weight",
      "layer3.27.le.norm.bias",
      "layer3.27.conv_in.norm.weight",
      "layer3.27.conv_in.norm.bias",
      "layer3.27.conv.norm.weight",
      "layer3.27.conv.norm.bias",
      "layer3.27.conv_out.norm.weight",
      "layer3.27.conv_out.norm.bias",
      "layer4.0.ln.weight",
      "layer4.0.ln.bias",
      "layer4.0.le.norm.weight",
      "layer4.0.le.norm.bias",
      "layer4.0.conv_in.norm.weight",
      "layer4.0.conv_in.norm.bias",
      "layer4.0.conv.norm.weight",
      "layer4.0.conv.norm.bias",
      "layer4.0.conv_out.norm.weight",
      "layer4.0.conv_out.norm.bias",
      "layer4.0.skip.0.norm.weight",
      "layer4.0.skip.0.norm.bias",
      "layer4.0.skip.1.norm.weight",
      "layer4.0.skip.1.norm.bias",
      "layer4.1.le.norm.weight",
      "layer4.1.le.norm.bias",
      "layer4.1.conv_in.norm.weight",
      "layer4.1.conv_in.norm.bias",
      "layer4.1.conv.norm.weight",
      "layer4.1.conv.norm.bias",
      "layer4.1.conv_out.norm.weight",
      "layer4.1.conv_out.norm.bias",
      "layer4.2.le.norm.weight",
      "layer4.2.le.norm.bias",
      "layer4.2.conv_in.norm.weight",
      "layer4.2.conv_in.norm.bias",
      "layer4.2.conv.norm.weight",
      "layer4.2.conv.norm.bias",
      "layer4.2.conv_out.norm.weight",
      "layer4.2.conv_out.norm.bias",
      "layer4.3.le.norm.weight",
      "layer4.3.le.norm.bias",
      "layer4.3.conv_in.norm.weight",
      "layer4.3.conv_in.norm.bias",
      "layer4.3.conv.norm.weight",
      "layer4.3.conv.norm.bias",
      "layer4.3.conv_out.norm.weight",
      "layer4.3.conv_out.norm.bias",
      "layer4.4.le.norm.weight",
      "layer4.4.le.norm.bias",
      "layer4.4.conv_in.norm.weight",
      "layer4.4.conv_in.norm.bias",
      "layer4.4.conv.norm.weight",
      "layer4.4.conv.norm.bias",
      "layer4.4.conv_out.norm.weight",
      "layer4.4.conv_out.norm.bias",
      "layer4.5.le.norm.weight",
      "layer4.5.le.norm.bias",
      "layer4.5.conv_in.norm.weight",
      "layer4.5.conv_in.norm.bias",
      "layer4.5.conv.norm.weight",
      "layer4.5.conv.norm.bias",
      "layer4.5.conv_out.norm.weight",
      "layer4.5.conv_out.norm.bias",
      "head.norm.weight",
      "head.norm.bias",
      "classifier.norm.weight",
      "classifier.norm.bias"
    ],
    "lr_scale": 1.0
  }
}
Use Cosine LR scheduler
Set warmup steps = 12500
Set warmup steps = 0
Max WD = 0.0500000, Min WD = 0.0500000
criterion = SoftTargetCrossEntropy()
Auto resume checkpoint: 
Start training for 300 epochs
Epoch: [0]  [   0/2502]  eta: 6:49:33  lr: 0.000000  min_lr: 0.000000  loss: 6.9740 (6.9740)  weight_decay: 0.0500 (0.0500)  time: 9.8214  data: 2.1180  max mem: 27255
Epoch: [0]  [ 200/2502]  eta: 0:14:46  lr: 0.000016  min_lr: 0.000016  loss: 6.9217 (6.9401)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.7639 (12.1099)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [0]  [ 400/2502]  eta: 0:12:38  lr: 0.000032  min_lr: 0.000032  loss: 6.8637 (6.9115)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2352 (8.1476)  time: 0.3363  data: 0.0004  max mem: 27255
Epoch: [0]  [ 600/2502]  eta: 0:11:09  lr: 0.000048  min_lr: 0.000048  loss: 6.7864 (6.8811)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.0128 (inf)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [0]  [ 800/2502]  eta: 0:09:52  lr: 0.000064  min_lr: 0.000064  loss: 6.7414 (6.8499)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.0007 (inf)  time: 0.3359  data: 0.0005  max mem: 27255
Epoch: [0]  [1000/2502]  eta: 0:08:39  lr: 0.000080  min_lr: 0.000080  loss: 6.6699 (6.8189)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.8718 (inf)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [0]  [1200/2502]  eta: 0:07:28  lr: 0.000096  min_lr: 0.000096  loss: 6.6395 (6.7937)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.7564 (inf)  time: 0.3367  data: 0.0006  max mem: 27255
Epoch: [0]  [1400/2502]  eta: 0:06:18  lr: 0.000112  min_lr: 0.000112  loss: 6.6479 (6.7709)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.4330 (inf)  time: 0.3366  data: 0.0006  max mem: 27255
Epoch: [0]  [1600/2502]  eta: 0:05:08  lr: 0.000128  min_lr: 0.000128  loss: 6.6092 (6.7493)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.4914 (inf)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [0]  [1800/2502]  eta: 0:03:59  lr: 0.000144  min_lr: 0.000144  loss: 6.5646 (6.7274)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.4951 (inf)  time: 0.3371  data: 0.0005  max mem: 27255
Epoch: [0]  [2000/2502]  eta: 0:02:51  lr: 0.000160  min_lr: 0.000160  loss: 6.4806 (6.7047)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.0725 (inf)  time: 0.3357  data: 0.0005  max mem: 27255
Epoch: [0]  [2200/2502]  eta: 0:01:42  lr: 0.000176  min_lr: 0.000176  loss: 6.5083 (6.6873)  weight_decay: 0.0500 (0.0500)  grad_norm: 8.4691 (inf)  time: 0.3363  data: 0.0004  max mem: 27255
Epoch: [0]  [2400/2502]  eta: 0:00:34  lr: 0.000192  min_lr: 0.000192  loss: 6.4334 (6.6685)  weight_decay: 0.0500 (0.0500)  grad_norm: 9.5190 (inf)  time: 0.3359  data: 0.0005  max mem: 27255
Epoch: [0]  [2501/2502]  eta: 0:00:00  lr: 0.000200  min_lr: 0.000200  loss: 6.4448 (6.6599)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.8725 (inf)  time: 0.3019  data: 0.0007  max mem: 27255
Epoch: [0] Total time: 0:14:10 (0.3400 s / it)
Averaged stats: lr: 0.000200  min_lr: 0.000200  loss: 6.4448 (6.6619)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.8725 (inf)
Test:  [ 0/50]  eta: 0:06:25  loss: 5.3142 (5.3142)  acc1: 5.2000 (5.2000)  acc5: 21.6000 (21.6000)  time: 7.7121  data: 3.2126  max mem: 27255
Test:  [10/50]  eta: 0:00:33  loss: 5.5270 (5.5473)  acc1: 3.6000 (4.8364)  acc5: 12.8000 (14.7636)  time: 0.8499  data: 0.2923  max mem: 27255
Test:  [20/50]  eta: 0:00:15  loss: 5.5270 (5.5311)  acc1: 3.6000 (4.3619)  acc5: 12.8000 (15.2000)  time: 0.1638  data: 0.0003  max mem: 27255
Test:  [30/50]  eta: 0:00:08  loss: 5.6223 (5.5783)  acc1: 4.0000 (4.4903)  acc5: 16.0000 (15.3806)  time: 0.1638  data: 0.0004  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 5.6565 (5.6094)  acc1: 4.8000 (4.5951)  acc5: 16.0000 (15.4341)  time: 0.2066  data: 0.0432  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 5.6531 (5.5730)  acc1: 4.8000 (5.0880)  acc5: 17.6000 (16.4560)  time: 0.2064  data: 0.0431  max mem: 27255
Test: Total time: 0:00:16 (0.3332 s / it)
* Acc@1 5.094 Acc@5 15.936 loss 5.586
Accuracy of the model on the 50000 test images: 5.1%
Max accuracy: 5.09%
Epoch: [1]  [   0/2502]  eta: 1:35:52  lr: 0.000200  min_lr: 0.000200  loss: 6.6224 (6.6224)  weight_decay: 0.0500 (0.0500)  time: 2.2990  data: 1.9266  max mem: 27255
Epoch: [1]  [ 200/2502]  eta: 0:13:12  lr: 0.000216  min_lr: 0.000216  loss: 6.4949 (6.4642)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.9360 (8.3103)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [1]  [ 400/2502]  eta: 0:11:54  lr: 0.000232  min_lr: 0.000232  loss: 6.3787 (6.4215)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.3406 (8.5920)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [1]  [ 600/2502]  eta: 0:10:43  lr: 0.000248  min_lr: 0.000248  loss: 6.4907 (6.4066)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.6257 (8.4427)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [1]  [ 800/2502]  eta: 0:09:35  lr: 0.000264  min_lr: 0.000264  loss: 6.2907 (6.3951)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.3046 (8.3046)  time: 0.3371  data: 0.0005  max mem: 27255
Epoch: [1]  [1000/2502]  eta: 0:08:26  lr: 0.000280  min_lr: 0.000280  loss: 6.2324 (6.3812)  weight_decay: 0.0500 (0.0500)  grad_norm: 7.3250 (8.2126)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [1]  [1200/2502]  eta: 0:07:18  lr: 0.000296  min_lr: 0.000296  loss: 6.3458 (6.3729)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.8463 (8.0469)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [1]  [1400/2502]  eta: 0:06:10  lr: 0.000312  min_lr: 0.000312  loss: 6.2604 (6.3622)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.3239 (7.9562)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [1]  [1600/2502]  eta: 0:05:03  lr: 0.000328  min_lr: 0.000328  loss: 6.2590 (6.3491)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.2225 (7.7739)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [1]  [1800/2502]  eta: 0:03:55  lr: 0.000344  min_lr: 0.000344  loss: 6.2539 (6.3358)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.3441 (7.6848)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [1]  [2000/2502]  eta: 0:02:48  lr: 0.000360  min_lr: 0.000360  loss: 6.2264 (6.3244)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.6787 (7.6009)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [1]  [2200/2502]  eta: 0:01:41  lr: 0.000376  min_lr: 0.000376  loss: 6.3483 (6.3171)  weight_decay: 0.0500 (0.0500)  grad_norm: 6.4194 (7.5241)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [1]  [2400/2502]  eta: 0:00:34  lr: 0.000392  min_lr: 0.000392  loss: 6.0750 (6.3040)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.7798 (7.3952)  time: 0.3362  data: 0.0005  max mem: 27255
Epoch: [1]  [2501/2502]  eta: 0:00:00  lr: 0.000400  min_lr: 0.000400  loss: 6.2740 (6.3004)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.2304 (7.3652)  time: 0.3002  data: 0.0008  max mem: 27255
Epoch: [1] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.000400  min_lr: 0.000400  loss: 6.2740 (6.3002)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.2304 (7.3652)
Test:  [ 0/50]  eta: 0:02:27  loss: 4.4220 (4.4220)  acc1: 18.4000 (18.4000)  acc5: 42.0000 (42.0000)  time: 2.9406  data: 2.7383  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 4.7489 (4.7203)  acc1: 10.8000 (11.2364)  acc5: 30.8000 (30.6909)  time: 0.4573  data: 0.2890  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 4.7118 (4.6541)  acc1: 10.8000 (11.8667)  acc5: 31.6000 (32.4381)  time: 0.2050  data: 0.0406  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 4.7794 (4.7584)  acc1: 11.6000 (11.9871)  acc5: 30.8000 (31.0839)  time: 0.2319  data: 0.0668  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 4.9402 (4.8126)  acc1: 12.0000 (11.7951)  acc5: 26.8000 (30.0488)  time: 0.2180  data: 0.0530  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 4.9125 (4.7750)  acc1: 12.8000 (12.4560)  acc5: 26.8000 (31.1360)  time: 0.1684  data: 0.0049  max mem: 27255
Test: Total time: 0:00:12 (0.2593 s / it)
* Acc@1 12.410 Acc@5 30.292 loss 4.782
Accuracy of the model on the 50000 test images: 12.4%
Max accuracy: 12.41%
Epoch: [2]  [   0/2502]  eta: 1:19:45  lr: 0.000400  min_lr: 0.000400  loss: 6.0557 (6.0557)  weight_decay: 0.0500 (0.0500)  time: 1.9127  data: 1.5592  max mem: 27255
Epoch: [2]  [ 200/2502]  eta: 0:13:07  lr: 0.000416  min_lr: 0.000416  loss: 6.1000 (6.1431)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.7241 (6.3866)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [2]  [ 400/2502]  eta: 0:11:53  lr: 0.000432  min_lr: 0.000432  loss: 6.2756 (6.1365)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.8180 (6.2620)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [2]  [ 600/2502]  eta: 0:10:42  lr: 0.000448  min_lr: 0.000448  loss: 6.1898 (6.1169)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.4799 (6.2182)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [2]  [ 800/2502]  eta: 0:09:33  lr: 0.000464  min_lr: 0.000464  loss: 6.0672 (6.1017)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.9550 (6.1974)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [2]  [1000/2502]  eta: 0:08:24  lr: 0.000480  min_lr: 0.000480  loss: 6.0983 (6.0981)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.3253 (6.1345)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [2]  [1200/2502]  eta: 0:07:17  lr: 0.000496  min_lr: 0.000496  loss: 6.1812 (6.0872)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.3759 (5.9472)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [2]  [1400/2502]  eta: 0:06:09  lr: 0.000512  min_lr: 0.000512  loss: 5.9427 (6.0837)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.4343 (5.7894)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [2]  [1600/2502]  eta: 0:05:02  lr: 0.000528  min_lr: 0.000528  loss: 6.1590 (6.0701)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.4540 (5.6819)  time: 0.3397  data: 0.0004  max mem: 27255
Epoch: [2]  [1800/2502]  eta: 0:03:55  lr: 0.000544  min_lr: 0.000544  loss: 6.1010 (6.0565)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.5835 (5.6191)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [2]  [2000/2502]  eta: 0:02:48  lr: 0.000560  min_lr: 0.000560  loss: 5.8134 (6.0390)  weight_decay: 0.0500 (0.0500)  grad_norm: 5.0999 (5.5549)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [2]  [2200/2502]  eta: 0:01:41  lr: 0.000576  min_lr: 0.000576  loss: 6.0472 (6.0285)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.5546 (5.4802)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [2]  [2400/2502]  eta: 0:00:34  lr: 0.000592  min_lr: 0.000592  loss: 5.8367 (6.0163)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.5441 (5.4518)  time: 0.3361  data: 0.0004  max mem: 27255
Epoch: [2]  [2501/2502]  eta: 0:00:00  lr: 0.000600  min_lr: 0.000600  loss: 5.7857 (6.0095)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.3017 (5.4133)  time: 0.3005  data: 0.0008  max mem: 27255
Epoch: [2] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000600  min_lr: 0.000600  loss: 5.7857 (6.0074)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.3017 (5.4133)
Test:  [ 0/50]  eta: 0:02:08  loss: 3.5832 (3.5832)  acc1: 28.4000 (28.4000)  acc5: 54.4000 (54.4000)  time: 2.5751  data: 2.3759  max mem: 27255
Test:  [10/50]  eta: 0:00:15  loss: 3.8345 (3.8104)  acc1: 22.8000 (22.2545)  acc5: 46.4000 (47.8909)  time: 0.3992  data: 0.2319  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 3.7658 (3.7463)  acc1: 21.6000 (23.9619)  acc5: 48.0000 (49.5619)  time: 0.1949  data: 0.0310  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 4.1157 (3.9471)  acc1: 20.8000 (22.4645)  acc5: 43.2000 (46.0645)  time: 0.2108  data: 0.0447  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 4.3843 (4.0573)  acc1: 18.8000 (21.4634)  acc5: 38.0000 (44.1268)  time: 0.1892  data: 0.0232  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 4.3572 (4.0387)  acc1: 18.8000 (21.8080)  acc5: 38.4000 (44.5520)  time: 0.1695  data: 0.0060  max mem: 27255
Test: Total time: 0:00:11 (0.2385 s / it)
* Acc@1 21.658 Acc@5 44.940 loss 4.040
Accuracy of the model on the 50000 test images: 21.7%
Max accuracy: 21.66%
Epoch: [3]  [   0/2502]  eta: 1:21:39  lr: 0.000600  min_lr: 0.000600  loss: 5.6683 (5.6683)  weight_decay: 0.0500 (0.0500)  time: 1.9584  data: 1.5998  max mem: 27255
Epoch: [3]  [ 200/2502]  eta: 0:13:08  lr: 0.000616  min_lr: 0.000616  loss: 5.9249 (5.8310)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.0769 (4.5107)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [3]  [ 400/2502]  eta: 0:11:52  lr: 0.000632  min_lr: 0.000632  loss: 5.9063 (5.8206)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.1459 (4.4678)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [3]  [ 600/2502]  eta: 0:10:41  lr: 0.000648  min_lr: 0.000648  loss: 5.8956 (5.8009)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.7266 (4.4400)  time: 0.3327  data: 0.0005  max mem: 27255
Epoch: [3]  [ 800/2502]  eta: 0:09:32  lr: 0.000664  min_lr: 0.000664  loss: 5.7357 (5.7934)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.3034 (4.4330)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [3]  [1000/2502]  eta: 0:08:24  lr: 0.000680  min_lr: 0.000680  loss: 5.7351 (5.7889)  weight_decay: 0.0500 (0.0500)  grad_norm: 4.1779 (4.4461)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [3]  [1200/2502]  eta: 0:07:17  lr: 0.000696  min_lr: 0.000696  loss: 5.7708 (5.7816)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.7639 (4.4251)  time: 0.3357  data: 0.0005  max mem: 27255
Epoch: [3]  [1400/2502]  eta: 0:06:09  lr: 0.000712  min_lr: 0.000712  loss: 5.7222 (5.7775)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.9626 (4.3992)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [3]  [1600/2502]  eta: 0:05:02  lr: 0.000728  min_lr: 0.000728  loss: 5.6563 (5.7657)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.6780 (4.3410)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [3]  [1800/2502]  eta: 0:03:55  lr: 0.000744  min_lr: 0.000744  loss: 5.6181 (5.7550)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.8556 (4.3131)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [3]  [2000/2502]  eta: 0:02:48  lr: 0.000760  min_lr: 0.000760  loss: 5.7150 (5.7487)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2536 (4.2700)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [3]  [2200/2502]  eta: 0:01:41  lr: 0.000776  min_lr: 0.000776  loss: 5.8013 (5.7363)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.4612 (4.2283)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [3]  [2400/2502]  eta: 0:00:34  lr: 0.000792  min_lr: 0.000792  loss: 5.7306 (5.7255)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.6981 (4.2116)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [3]  [2501/2502]  eta: 0:00:00  lr: 0.000800  min_lr: 0.000800  loss: 5.5695 (5.7204)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1562 (4.1818)  time: 0.2999  data: 0.0008  max mem: 27255
Epoch: [3] Total time: 0:13:57 (0.3346 s / it)
Averaged stats: lr: 0.000800  min_lr: 0.000800  loss: 5.5695 (5.7207)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1562 (4.1818)
Test:  [ 0/50]  eta: 0:02:51  loss: 2.9606 (2.9606)  acc1: 43.6000 (43.6000)  acc5: 71.6000 (71.6000)  time: 3.4320  data: 3.2326  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 3.3398 (3.1991)  acc1: 33.2000 (33.3818)  acc5: 59.2000 (61.7091)  time: 0.4804  data: 0.3056  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 3.2018 (3.1066)  acc1: 33.2000 (34.8190)  acc5: 60.4000 (63.4286)  time: 0.2103  data: 0.0380  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 3.3388 (3.3084)  acc1: 28.8000 (32.5290)  acc5: 55.2000 (59.2000)  time: 0.2298  data: 0.0617  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 3.8261 (3.4314)  acc1: 26.0000 (30.6829)  acc5: 48.8000 (56.5561)  time: 0.1973  data: 0.0333  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 3.7531 (3.4219)  acc1: 26.0000 (31.0240)  acc5: 50.0000 (56.6400)  time: 0.1970  data: 0.0332  max mem: 27255
Test: Total time: 0:00:13 (0.2634 s / it)
* Acc@1 30.942 Acc@5 56.526 loss 3.419
Accuracy of the model on the 50000 test images: 30.9%
Max accuracy: 30.94%
Epoch: [4]  [   0/2502]  eta: 1:20:14  lr: 0.000800  min_lr: 0.000800  loss: 6.0228 (6.0228)  weight_decay: 0.0500 (0.0500)  time: 1.9242  data: 1.5718  max mem: 27255
Epoch: [4]  [ 200/2502]  eta: 0:13:06  lr: 0.000816  min_lr: 0.000816  loss: 5.7114 (5.6478)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.0190 (3.5618)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [4]  [ 400/2502]  eta: 0:11:49  lr: 0.000832  min_lr: 0.000832  loss: 5.6817 (5.6112)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.5549 (3.5825)  time: 0.3324  data: 0.0004  max mem: 27255
Epoch: [4]  [ 600/2502]  eta: 0:10:41  lr: 0.000848  min_lr: 0.000848  loss: 5.4432 (5.5732)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.5427 (3.5731)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [4]  [ 800/2502]  eta: 0:09:32  lr: 0.000864  min_lr: 0.000864  loss: 5.6542 (5.5631)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1930 (3.5402)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [4]  [1000/2502]  eta: 0:08:24  lr: 0.000880  min_lr: 0.000880  loss: 5.5989 (5.5524)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2344 (3.5441)  time: 0.3363  data: 0.0004  max mem: 27255
Epoch: [4]  [1200/2502]  eta: 0:07:17  lr: 0.000896  min_lr: 0.000896  loss: 5.7140 (5.5483)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.3784 (3.5633)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [4]  [1400/2502]  eta: 0:06:10  lr: 0.000912  min_lr: 0.000912  loss: 5.5901 (5.5418)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.2726 (3.5252)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [4]  [1600/2502]  eta: 0:05:03  lr: 0.000928  min_lr: 0.000928  loss: 5.5233 (5.5381)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.1071 (3.4804)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [4]  [1800/2502]  eta: 0:03:55  lr: 0.000944  min_lr: 0.000944  loss: 5.4422 (5.5263)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6830 (3.4444)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [4]  [2000/2502]  eta: 0:02:48  lr: 0.000960  min_lr: 0.000960  loss: 5.4240 (5.5162)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.9905 (3.4095)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [4]  [2200/2502]  eta: 0:01:41  lr: 0.000976  min_lr: 0.000976  loss: 5.1705 (5.5020)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.9309 (3.3987)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [4]  [2400/2502]  eta: 0:00:34  lr: 0.000992  min_lr: 0.000992  loss: 5.3427 (5.4916)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.8405 (3.3665)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [4]  [2501/2502]  eta: 0:00:00  lr: 0.001000  min_lr: 0.001000  loss: 5.4111 (5.4845)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.4977 (3.3351)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [4] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.001000  min_lr: 0.001000  loss: 5.4111 (5.4923)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.4977 (3.3351)
Test:  [ 0/50]  eta: 0:02:17  loss: 2.2594 (2.2594)  acc1: 50.8000 (50.8000)  acc5: 78.0000 (78.0000)  time: 2.7443  data: 2.5412  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 2.7412 (2.7250)  acc1: 41.6000 (42.5818)  acc5: 70.4000 (70.1818)  time: 0.4375  data: 0.2695  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 2.7295 (2.6863)  acc1: 40.8000 (42.9905)  acc5: 71.2000 (71.2381)  time: 0.2482  data: 0.0841  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 3.0277 (2.9403)  acc1: 36.8000 (40.1677)  acc5: 60.4000 (66.3742)  time: 0.2861  data: 0.1203  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 3.4939 (3.0749)  acc1: 31.6000 (38.0488)  acc5: 55.2000 (64.0390)  time: 0.2273  data: 0.0609  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 3.4018 (3.0796)  acc1: 32.4000 (37.9200)  acc5: 57.6000 (63.8960)  time: 0.1681  data: 0.0035  max mem: 27255
Test: Total time: 0:00:13 (0.2766 s / it)
* Acc@1 37.966 Acc@5 63.852 loss 3.075
Accuracy of the model on the 50000 test images: 38.0%
Max accuracy: 37.97%
Epoch: [5]  [   0/2502]  eta: 1:26:12  lr: 0.001000  min_lr: 0.001000  loss: 5.9140 (5.9140)  weight_decay: 0.0500 (0.0500)  time: 2.0674  data: 1.7283  max mem: 27255
Epoch: [5]  [ 200/2502]  eta: 0:13:11  lr: 0.001016  min_lr: 0.001016  loss: 5.6214 (5.4410)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.8651 (2.9347)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [5]  [ 400/2502]  eta: 0:11:52  lr: 0.001032  min_lr: 0.001032  loss: 5.6489 (5.4076)  weight_decay: 0.0500 (0.0500)  grad_norm: 3.0106 (2.9437)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [5]  [ 600/2502]  eta: 0:10:42  lr: 0.001048  min_lr: 0.001048  loss: 5.6578 (5.3957)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.8542 (2.9139)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [5]  [ 800/2502]  eta: 0:09:32  lr: 0.001064  min_lr: 0.001064  loss: 5.6531 (5.3854)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6365 (2.8884)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [5]  [1000/2502]  eta: 0:08:24  lr: 0.001080  min_lr: 0.001080  loss: 5.4826 (5.3715)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.4796 (2.8432)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [5]  [1200/2502]  eta: 0:07:16  lr: 0.001096  min_lr: 0.001096  loss: 5.1915 (5.3448)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.7131 (2.8609)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [5]  [1400/2502]  eta: 0:06:09  lr: 0.001112  min_lr: 0.001112  loss: 5.5080 (5.3373)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5022 (2.8316)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [5]  [1600/2502]  eta: 0:05:02  lr: 0.001128  min_lr: 0.001128  loss: 5.5114 (5.3339)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5460 (2.8241)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [5]  [1800/2502]  eta: 0:03:55  lr: 0.001144  min_lr: 0.001144  loss: 5.3803 (5.3256)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5399 (2.7928)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [5]  [2000/2502]  eta: 0:02:48  lr: 0.001160  min_lr: 0.001160  loss: 5.5617 (5.3295)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.2601 (2.7629)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [5]  [2200/2502]  eta: 0:01:41  lr: 0.001176  min_lr: 0.001176  loss: 4.9681 (5.3201)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.6265 (2.7610)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [5]  [2400/2502]  eta: 0:00:34  lr: 0.001192  min_lr: 0.001192  loss: 5.5012 (5.3110)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3994 (2.7333)  time: 0.3325  data: 0.0004  max mem: 27255
Epoch: [5]  [2501/2502]  eta: 0:00:00  lr: 0.001200  min_lr: 0.001200  loss: 5.4641 (5.3075)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5697 (2.7313)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [5] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.001200  min_lr: 0.001200  loss: 5.4641 (5.2943)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.5697 (2.7313)
Test:  [ 0/50]  eta: 0:02:54  loss: 1.9337 (1.9337)  acc1: 61.6000 (61.6000)  acc5: 83.2000 (83.2000)  time: 3.4918  data: 3.2891  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 2.3505 (2.3579)  acc1: 49.2000 (49.8182)  acc5: 77.2000 (77.3091)  time: 0.4860  data: 0.3180  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 2.3415 (2.3356)  acc1: 47.6000 (50.0000)  acc5: 77.6000 (77.7524)  time: 0.1853  data: 0.0211  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 2.5177 (2.5913)  acc1: 44.8000 (46.4645)  acc5: 69.6000 (73.0968)  time: 0.2075  data: 0.0437  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 3.1797 (2.7540)  acc1: 36.0000 (43.6878)  acc5: 62.0000 (69.9122)  time: 0.1967  data: 0.0331  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 3.1723 (2.7664)  acc1: 36.8000 (43.6320)  acc5: 62.0000 (69.6800)  time: 0.1968  data: 0.0329  max mem: 27255
Test: Total time: 0:00:12 (0.2542 s / it)
* Acc@1 43.696 Acc@5 69.644 loss 2.763
Accuracy of the model on the 50000 test images: 43.7%
Max accuracy: 43.70%
Epoch: [6]  [   0/2502]  eta: 1:20:13  lr: 0.001200  min_lr: 0.001200  loss: 5.8058 (5.8058)  weight_decay: 0.0500 (0.0500)  time: 1.9237  data: 1.5785  max mem: 27255
Epoch: [6]  [ 200/2502]  eta: 0:13:07  lr: 0.001216  min_lr: 0.001216  loss: 5.3826 (5.2078)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3232 (2.3134)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [6]  [ 400/2502]  eta: 0:11:50  lr: 0.001232  min_lr: 0.001232  loss: 4.8896 (5.2054)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.3265 (2.3946)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [6]  [ 600/2502]  eta: 0:10:40  lr: 0.001248  min_lr: 0.001248  loss: 5.1552 (5.1970)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9124 (2.3552)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [6]  [ 800/2502]  eta: 0:09:31  lr: 0.001264  min_lr: 0.001264  loss: 5.2545 (5.1905)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9199 (2.2939)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [6]  [1000/2502]  eta: 0:08:23  lr: 0.001280  min_lr: 0.001280  loss: 5.0844 (5.1821)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0205 (2.2722)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [6]  [1200/2502]  eta: 0:07:16  lr: 0.001296  min_lr: 0.001296  loss: 5.2575 (5.1744)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.1425 (2.2713)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [6]  [1400/2502]  eta: 0:06:09  lr: 0.001312  min_lr: 0.001312  loss: 5.1222 (5.1612)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.2051 (2.2649)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [6]  [1600/2502]  eta: 0:05:02  lr: 0.001328  min_lr: 0.001328  loss: 4.8423 (5.1546)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7671 (2.2388)  time: 0.3418  data: 0.0004  max mem: 27255
Epoch: [6]  [1800/2502]  eta: 0:03:55  lr: 0.001344  min_lr: 0.001344  loss: 4.5883 (5.1524)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.2314 (2.2408)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [6]  [2000/2502]  eta: 0:02:48  lr: 0.001360  min_lr: 0.001360  loss: 5.3804 (5.1474)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9755 (2.2386)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [6]  [2200/2502]  eta: 0:01:41  lr: 0.001376  min_lr: 0.001376  loss: 5.0269 (5.1391)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.1350 (2.2318)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [6]  [2400/2502]  eta: 0:00:34  lr: 0.001392  min_lr: 0.001392  loss: 5.1390 (5.1335)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8773 (2.2166)  time: 0.3419  data: 0.0004  max mem: 27255
Epoch: [6]  [2501/2502]  eta: 0:00:00  lr: 0.001400  min_lr: 0.001400  loss: 5.2405 (5.1305)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0077 (2.2135)  time: 0.3007  data: 0.0007  max mem: 27255
Epoch: [6] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.001400  min_lr: 0.001400  loss: 5.2405 (5.1402)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0077 (2.2135)
Test:  [ 0/50]  eta: 0:02:49  loss: 1.6983 (1.6983)  acc1: 67.2000 (67.2000)  acc5: 88.8000 (88.8000)  time: 3.3967  data: 3.1947  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 2.1797 (2.1681)  acc1: 51.6000 (54.1091)  acc5: 81.2000 (81.2364)  time: 0.4722  data: 0.3042  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 2.1166 (2.1069)  acc1: 53.6000 (55.1238)  acc5: 81.2000 (81.6762)  time: 0.1895  data: 0.0253  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 2.2924 (2.3503)  acc1: 47.2000 (51.6258)  acc5: 70.0000 (76.9677)  time: 0.1973  data: 0.0334  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.9108 (2.4999)  acc1: 39.6000 (48.7805)  acc5: 66.0000 (74.2146)  time: 0.2335  data: 0.0697  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.9099 (2.5215)  acc1: 39.6000 (48.2880)  acc5: 66.0000 (73.8240)  time: 0.2177  data: 0.0541  max mem: 27255
Test: Total time: 0:00:13 (0.2685 s / it)
* Acc@1 48.052 Acc@5 73.784 loss 2.515
Accuracy of the model on the 50000 test images: 48.1%
Max accuracy: 48.05%
Epoch: [7]  [   0/2502]  eta: 1:27:32  lr: 0.001400  min_lr: 0.001400  loss: 4.2005 (4.2005)  weight_decay: 0.0500 (0.0500)  time: 2.0993  data: 1.7492  max mem: 27255
Epoch: [7]  [ 200/2502]  eta: 0:13:13  lr: 0.001416  min_lr: 0.001416  loss: 4.5190 (5.0364)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7997 (1.9302)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [7]  [ 400/2502]  eta: 0:11:54  lr: 0.001432  min_lr: 0.001432  loss: 5.2422 (5.0328)  weight_decay: 0.0500 (0.0500)  grad_norm: 2.0550 (2.0103)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [7]  [ 600/2502]  eta: 0:10:42  lr: 0.001448  min_lr: 0.001448  loss: 5.3695 (5.0454)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9079 (1.9746)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [7]  [ 800/2502]  eta: 0:09:33  lr: 0.001464  min_lr: 0.001464  loss: 4.9812 (5.0602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8184 (1.9532)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [7]  [1000/2502]  eta: 0:08:25  lr: 0.001480  min_lr: 0.001480  loss: 5.3610 (5.0522)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7430 (1.9367)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [7]  [1200/2502]  eta: 0:07:17  lr: 0.001496  min_lr: 0.001496  loss: 5.2973 (5.0376)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8447 (1.9468)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [7]  [1400/2502]  eta: 0:06:10  lr: 0.001512  min_lr: 0.001512  loss: 5.1309 (5.0297)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7009 (1.9296)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [7]  [1600/2502]  eta: 0:05:02  lr: 0.001528  min_lr: 0.001528  loss: 5.1242 (5.0215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7148 (1.9244)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [7]  [1800/2502]  eta: 0:03:55  lr: 0.001544  min_lr: 0.001544  loss: 4.8963 (5.0187)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5525 (1.9022)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [7]  [2000/2502]  eta: 0:02:48  lr: 0.001560  min_lr: 0.001560  loss: 4.9691 (5.0173)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5314 (1.8870)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [7]  [2200/2502]  eta: 0:01:41  lr: 0.001576  min_lr: 0.001576  loss: 5.3133 (5.0134)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5341 (1.8593)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [7]  [2400/2502]  eta: 0:00:34  lr: 0.001592  min_lr: 0.001592  loss: 5.2932 (5.0067)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4589 (1.8413)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [7]  [2501/2502]  eta: 0:00:00  lr: 0.001600  min_lr: 0.001600  loss: 4.8291 (5.0042)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6357 (1.8418)  time: 0.2997  data: 0.0007  max mem: 27255
Epoch: [7] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.001600  min_lr: 0.001600  loss: 4.8291 (5.0019)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6357 (1.8418)
Test:  [ 0/50]  eta: 0:03:01  loss: 1.8586 (1.8586)  acc1: 64.0000 (64.0000)  acc5: 82.4000 (82.4000)  time: 3.6253  data: 3.4364  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 2.0218 (2.0542)  acc1: 58.8000 (58.0364)  acc5: 82.0000 (82.1818)  time: 0.4819  data: 0.3156  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 2.0005 (2.0133)  acc1: 57.6000 (57.8667)  acc5: 82.4000 (83.1048)  time: 0.1872  data: 0.0232  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 2.3030 (2.2479)  acc1: 48.8000 (54.0000)  acc5: 74.8000 (78.8903)  time: 0.2238  data: 0.0598  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.8177 (2.3852)  acc1: 43.6000 (51.3561)  acc5: 67.2000 (76.1756)  time: 0.2061  data: 0.0424  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.7773 (2.4008)  acc1: 44.0000 (51.0320)  acc5: 68.8000 (75.6640)  time: 0.2058  data: 0.0422  max mem: 27255
Test: Total time: 0:00:13 (0.2614 s / it)
* Acc@1 50.812 Acc@5 75.602 loss 2.401
Accuracy of the model on the 50000 test images: 50.8%
Max accuracy: 50.81%
Epoch: [8]  [   0/2502]  eta: 1:29:20  lr: 0.001600  min_lr: 0.001600  loss: 5.5937 (5.5937)  weight_decay: 0.0500 (0.0500)  time: 2.1423  data: 1.8044  max mem: 27255
Epoch: [8]  [ 200/2502]  eta: 0:13:11  lr: 0.001616  min_lr: 0.001616  loss: 4.9456 (4.9461)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4365 (1.5502)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [8]  [ 400/2502]  eta: 0:11:52  lr: 0.001632  min_lr: 0.001632  loss: 4.8229 (4.9316)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5528 (1.5744)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [8]  [ 600/2502]  eta: 0:10:41  lr: 0.001648  min_lr: 0.001648  loss: 5.2274 (4.9495)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7168 (1.5648)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [8]  [ 800/2502]  eta: 0:09:32  lr: 0.001664  min_lr: 0.001664  loss: 5.1872 (4.9466)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4151 (1.5395)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [8]  [1000/2502]  eta: 0:08:24  lr: 0.001680  min_lr: 0.001680  loss: 4.9870 (4.9372)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3408 (1.5336)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [8]  [1200/2502]  eta: 0:07:17  lr: 0.001696  min_lr: 0.001696  loss: 5.3032 (4.9249)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4464 (1.5314)  time: 0.3401  data: 0.0007  max mem: 27255
Epoch: [8]  [1400/2502]  eta: 0:06:10  lr: 0.001712  min_lr: 0.001712  loss: 4.8926 (4.9216)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3680 (1.5282)  time: 0.3424  data: 0.0004  max mem: 27255
Epoch: [8]  [1600/2502]  eta: 0:05:02  lr: 0.001728  min_lr: 0.001728  loss: 4.7364 (4.9038)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3239 (1.5120)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [8]  [1800/2502]  eta: 0:03:55  lr: 0.001744  min_lr: 0.001744  loss: 4.9741 (4.9037)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4132 (1.5025)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [8]  [2000/2502]  eta: 0:02:48  lr: 0.001760  min_lr: 0.001760  loss: 5.0734 (4.9051)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5264 (1.4936)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [8]  [2200/2502]  eta: 0:01:41  lr: 0.001776  min_lr: 0.001776  loss: 5.0096 (4.9001)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2336 (1.4819)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [8]  [2400/2502]  eta: 0:00:34  lr: 0.001792  min_lr: 0.001792  loss: 5.0009 (4.9053)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3725 (1.4765)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [8]  [2501/2502]  eta: 0:00:00  lr: 0.001800  min_lr: 0.001800  loss: 4.6690 (4.9054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2733 (1.4725)  time: 0.2999  data: 0.0008  max mem: 27255
Epoch: [8] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.001800  min_lr: 0.001800  loss: 4.6690 (4.9060)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2733 (1.4725)
Test:  [ 0/50]  eta: 0:02:27  loss: 1.5142 (1.5142)  acc1: 68.8000 (68.8000)  acc5: 89.2000 (89.2000)  time: 2.9586  data: 2.7640  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.9645 (1.8831)  acc1: 60.4000 (60.3636)  acc5: 84.0000 (84.8727)  time: 0.4270  data: 0.2602  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.8745 (1.8494)  acc1: 60.0000 (60.4381)  acc5: 85.2000 (85.3333)  time: 0.1909  data: 0.0251  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 2.0926 (2.0670)  acc1: 53.6000 (57.0194)  acc5: 76.8000 (81.2774)  time: 0.2446  data: 0.0788  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.6046 (2.2156)  acc1: 46.4000 (54.1463)  acc5: 70.4000 (78.5073)  time: 0.2384  data: 0.0746  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.5781 (2.2306)  acc1: 46.4000 (53.8080)  acc5: 71.2000 (78.2400)  time: 0.1797  data: 0.0161  max mem: 27255
Test: Total time: 0:00:13 (0.2623 s / it)
* Acc@1 53.384 Acc@5 78.136 loss 2.234
Accuracy of the model on the 50000 test images: 53.4%
Max accuracy: 53.38%
Epoch: [9]  [   0/2502]  eta: 1:15:13  lr: 0.001800  min_lr: 0.001800  loss: 5.4569 (5.4569)  weight_decay: 0.0500 (0.0500)  time: 1.8039  data: 1.4543  max mem: 27255
Epoch: [9]  [ 200/2502]  eta: 0:13:05  lr: 0.001816  min_lr: 0.001816  loss: 4.4253 (4.8163)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3147 (1.3798)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [9]  [ 400/2502]  eta: 0:11:50  lr: 0.001832  min_lr: 0.001832  loss: 4.8715 (4.8162)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1695 (1.3766)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [9]  [ 600/2502]  eta: 0:10:40  lr: 0.001848  min_lr: 0.001848  loss: 5.0785 (4.8168)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3851 (1.3717)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [9]  [ 800/2502]  eta: 0:09:32  lr: 0.001864  min_lr: 0.001864  loss: 4.0720 (4.8142)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1717 (1.3467)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [9]  [1000/2502]  eta: 0:08:24  lr: 0.001880  min_lr: 0.001880  loss: 4.7538 (4.8120)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1674 (1.3290)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [9]  [1200/2502]  eta: 0:07:16  lr: 0.001896  min_lr: 0.001896  loss: 4.8380 (4.8220)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1027 (1.3125)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [9]  [1400/2502]  eta: 0:06:09  lr: 0.001912  min_lr: 0.001912  loss: 4.8964 (4.8184)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2873 (1.3014)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [9]  [1600/2502]  eta: 0:05:02  lr: 0.001928  min_lr: 0.001928  loss: 4.9936 (4.8225)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1736 (1.2908)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [9]  [1800/2502]  eta: 0:03:55  lr: 0.001944  min_lr: 0.001944  loss: 4.8444 (4.8221)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1299 (1.2827)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [9]  [2000/2502]  eta: 0:02:48  lr: 0.001960  min_lr: 0.001960  loss: 4.8815 (4.8149)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2166 (1.2698)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [9]  [2200/2502]  eta: 0:01:41  lr: 0.001976  min_lr: 0.001976  loss: 4.8257 (4.8168)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1012 (1.2588)  time: 0.3373  data: 0.0004  max mem: 27255
Epoch: [9]  [2400/2502]  eta: 0:00:34  lr: 0.001992  min_lr: 0.001992  loss: 5.1524 (4.8151)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1126 (1.2472)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [9]  [2501/2502]  eta: 0:00:00  lr: 0.002000  min_lr: 0.002000  loss: 4.9148 (4.8161)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1264 (1.2427)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [9] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.002000  min_lr: 0.002000  loss: 4.9148 (4.8098)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1264 (1.2427)
Test:  [ 0/50]  eta: 0:02:46  loss: 1.5948 (1.5948)  acc1: 68.4000 (68.4000)  acc5: 87.6000 (87.6000)  time: 3.3227  data: 3.1279  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.8861 (1.8990)  acc1: 62.4000 (60.0364)  acc5: 86.4000 (85.2364)  time: 0.4580  data: 0.2904  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.7850 (1.8330)  acc1: 62.0000 (60.8762)  acc5: 87.2000 (86.3048)  time: 0.1757  data: 0.0112  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 2.0652 (2.0510)  acc1: 53.6000 (57.6516)  acc5: 79.2000 (82.6065)  time: 0.2086  data: 0.0443  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.5525 (2.1801)  acc1: 48.0000 (55.2585)  acc5: 73.6000 (80.1756)  time: 0.2104  data: 0.0424  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.5185 (2.2016)  acc1: 48.8000 (54.9520)  acc5: 74.0000 (79.7600)  time: 0.2101  data: 0.0423  max mem: 27255
Test: Total time: 0:00:12 (0.2520 s / it)
* Acc@1 55.024 Acc@5 79.544 loss 2.197
Accuracy of the model on the 50000 test images: 55.0%
Max accuracy: 55.02%
Epoch: [10]  [   0/2502]  eta: 1:24:06  lr: 0.002000  min_lr: 0.002000  loss: 4.9792 (4.9792)  weight_decay: 0.0500 (0.0500)  time: 2.0170  data: 1.6664  max mem: 27255
Epoch: [10]  [ 200/2502]  eta: 0:13:09  lr: 0.002016  min_lr: 0.002016  loss: 4.8844 (4.7099)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0942 (1.0836)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [10]  [ 400/2502]  eta: 0:11:54  lr: 0.002032  min_lr: 0.002032  loss: 4.7727 (4.7560)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0443 (1.1139)  time: 0.3359  data: 0.0004  max mem: 27255
Epoch: [10]  [ 600/2502]  eta: 0:10:43  lr: 0.002048  min_lr: 0.002048  loss: 4.8754 (4.7295)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1560 (inf)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [10]  [ 800/2502]  eta: 0:09:33  lr: 0.002064  min_lr: 0.002064  loss: 4.5571 (4.7287)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0928 (inf)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [10]  [1000/2502]  eta: 0:08:25  lr: 0.002080  min_lr: 0.002080  loss: 4.3662 (4.7140)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0867 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [10]  [1200/2502]  eta: 0:07:17  lr: 0.002096  min_lr: 0.002096  loss: 5.0934 (4.7123)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9701 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [10]  [1400/2502]  eta: 0:06:09  lr: 0.002112  min_lr: 0.002112  loss: 4.8760 (4.7237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9741 (inf)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [10]  [1600/2502]  eta: 0:05:02  lr: 0.002128  min_lr: 0.002128  loss: 4.9562 (4.7261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9232 (inf)  time: 0.3386  data: 0.0004  max mem: 27255
Epoch: [10]  [1800/2502]  eta: 0:03:55  lr: 0.002144  min_lr: 0.002144  loss: 4.7684 (4.7273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9379 (inf)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [10]  [2000/2502]  eta: 0:02:48  lr: 0.002160  min_lr: 0.002160  loss: 4.8504 (4.7321)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8576 (inf)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [10]  [2200/2502]  eta: 0:01:41  lr: 0.002176  min_lr: 0.002176  loss: 4.9526 (4.7344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9221 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [10]  [2400/2502]  eta: 0:00:34  lr: 0.002192  min_lr: 0.002192  loss: 4.9292 (4.7275)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9445 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [10]  [2501/2502]  eta: 0:00:00  lr: 0.002200  min_lr: 0.002200  loss: 4.8679 (4.7245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8874 (inf)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [10] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002200  min_lr: 0.002200  loss: 4.8679 (4.7266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8874 (inf)
Test:  [ 0/50]  eta: 0:02:34  loss: 1.5215 (1.5215)  acc1: 68.8000 (68.8000)  acc5: 90.0000 (90.0000)  time: 3.0914  data: 2.8874  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 1.6900 (1.8028)  acc1: 68.8000 (62.9455)  acc5: 85.2000 (85.8909)  time: 0.5242  data: 0.3569  max mem: 27255
Test:  [20/50]  eta: 0:00:12  loss: 1.6867 (1.7487)  acc1: 64.0000 (63.4286)  acc5: 86.4000 (86.9714)  time: 0.2669  data: 0.1032  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.8386 (1.9621)  acc1: 58.0000 (59.5871)  acc5: 80.4000 (83.1742)  time: 0.2350  data: 0.0712  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.4635 (2.0865)  acc1: 50.8000 (56.9854)  acc5: 73.2000 (80.9463)  time: 0.1935  data: 0.0296  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.4635 (2.1101)  acc1: 50.8000 (56.3440)  acc5: 73.2000 (80.3440)  time: 0.1735  data: 0.0098  max mem: 27255
Test: Total time: 0:00:13 (0.2773 s / it)
* Acc@1 55.944 Acc@5 80.328 loss 2.113
Accuracy of the model on the 50000 test images: 55.9%
Max accuracy: 55.94%
Epoch: [11]  [   0/2502]  eta: 1:21:45  lr: 0.002200  min_lr: 0.002200  loss: 5.0368 (5.0368)  weight_decay: 0.0500 (0.0500)  time: 1.9607  data: 1.6115  max mem: 27255
Epoch: [11]  [ 200/2502]  eta: 0:13:07  lr: 0.002216  min_lr: 0.002216  loss: 4.8925 (4.6331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9648 (0.9819)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [11]  [ 400/2502]  eta: 0:11:51  lr: 0.002232  min_lr: 0.002232  loss: 4.4841 (4.6608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8871 (0.9773)  time: 0.3404  data: 0.0004  max mem: 27255
Epoch: [11]  [ 600/2502]  eta: 0:10:41  lr: 0.002248  min_lr: 0.002248  loss: 4.7863 (4.6493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9219 (0.9708)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [11]  [ 800/2502]  eta: 0:09:32  lr: 0.002264  min_lr: 0.002264  loss: 4.4273 (4.6500)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8148 (0.9618)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [11]  [1000/2502]  eta: 0:08:25  lr: 0.002280  min_lr: 0.002280  loss: 4.6863 (4.6426)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8611 (0.9514)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [11]  [1200/2502]  eta: 0:07:17  lr: 0.002296  min_lr: 0.002296  loss: 4.8182 (4.6564)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7969 (0.9497)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [11]  [1400/2502]  eta: 0:06:09  lr: 0.002312  min_lr: 0.002312  loss: 4.4379 (4.6545)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7916 (0.9374)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [11]  [1600/2502]  eta: 0:05:02  lr: 0.002328  min_lr: 0.002328  loss: 4.4858 (4.6486)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8835 (0.9340)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [11]  [1800/2502]  eta: 0:03:55  lr: 0.002344  min_lr: 0.002344  loss: 5.0185 (4.6483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8521 (0.9326)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [11]  [2000/2502]  eta: 0:02:48  lr: 0.002360  min_lr: 0.002360  loss: 5.0411 (4.6507)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7866 (0.9247)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [11]  [2200/2502]  eta: 0:01:41  lr: 0.002376  min_lr: 0.002376  loss: 4.9546 (4.6481)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9062 (0.9258)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [11]  [2400/2502]  eta: 0:00:34  lr: 0.002392  min_lr: 0.002392  loss: 4.4101 (4.6457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8511 (0.9204)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [11]  [2501/2502]  eta: 0:00:00  lr: 0.002400  min_lr: 0.002400  loss: 4.6823 (4.6456)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8353 (0.9167)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [11] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002400  min_lr: 0.002400  loss: 4.6823 (4.6468)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8353 (0.9167)
Test:  [ 0/50]  eta: 0:02:42  loss: 1.6618 (1.6618)  acc1: 72.0000 (72.0000)  acc5: 89.2000 (89.2000)  time: 3.2453  data: 3.0446  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.7903 (1.8231)  acc1: 66.8000 (64.3636)  acc5: 87.6000 (86.9818)  time: 0.4479  data: 0.2806  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.7210 (1.7399)  acc1: 63.6000 (64.7810)  acc5: 88.4000 (88.3048)  time: 0.1963  data: 0.0324  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.8941 (1.9153)  acc1: 58.0000 (61.7161)  acc5: 83.2000 (85.0065)  time: 0.2506  data: 0.0869  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.3518 (2.0363)  acc1: 52.0000 (59.0146)  acc5: 75.6000 (82.4976)  time: 0.2273  data: 0.0638  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.3379 (2.0616)  acc1: 51.2000 (58.4320)  acc5: 76.4000 (82.0480)  time: 0.1707  data: 0.0072  max mem: 27255
Test: Total time: 0:00:13 (0.2658 s / it)
* Acc@1 57.986 Acc@5 81.704 loss 2.069
Accuracy of the model on the 50000 test images: 58.0%
Max accuracy: 57.99%
Epoch: [12]  [   0/2502]  eta: 1:24:30  lr: 0.002400  min_lr: 0.002400  loss: 5.2033 (5.2033)  weight_decay: 0.0500 (0.0500)  time: 2.0266  data: 1.6675  max mem: 27255
Epoch: [12]  [ 200/2502]  eta: 0:13:08  lr: 0.002416  min_lr: 0.002416  loss: 4.5973 (4.6494)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8827 (0.8902)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [12]  [ 400/2502]  eta: 0:11:52  lr: 0.002432  min_lr: 0.002432  loss: 4.2061 (4.6227)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8514 (0.8668)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [12]  [ 600/2502]  eta: 0:10:42  lr: 0.002448  min_lr: 0.002448  loss: 4.7912 (4.6026)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8897 (0.8613)  time: 0.3418  data: 0.0005  max mem: 27255
Epoch: [12]  [ 800/2502]  eta: 0:09:33  lr: 0.002464  min_lr: 0.002464  loss: 4.7233 (4.6103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7900 (0.8451)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [12]  [1000/2502]  eta: 0:08:24  lr: 0.002480  min_lr: 0.002480  loss: 4.6917 (4.6192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7972 (0.8503)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [12]  [1200/2502]  eta: 0:07:17  lr: 0.002496  min_lr: 0.002496  loss: 4.3673 (4.6297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8180 (0.8589)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [12]  [1400/2502]  eta: 0:06:09  lr: 0.002512  min_lr: 0.002512  loss: 4.9032 (4.6260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8194 (0.8576)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [12]  [1600/2502]  eta: 0:05:02  lr: 0.002528  min_lr: 0.002528  loss: 4.6769 (4.6252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8005 (0.8531)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [12]  [1800/2502]  eta: 0:03:55  lr: 0.002544  min_lr: 0.002544  loss: 4.8727 (4.6273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7608 (0.8498)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [12]  [2000/2502]  eta: 0:02:48  lr: 0.002560  min_lr: 0.002560  loss: 4.2783 (4.6194)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7949 (0.8461)  time: 0.3387  data: 0.0005  max mem: 27255
Epoch: [12]  [2200/2502]  eta: 0:01:41  lr: 0.002576  min_lr: 0.002576  loss: 4.7002 (4.6198)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7837 (0.8415)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [12]  [2400/2502]  eta: 0:00:34  lr: 0.002592  min_lr: 0.002592  loss: 4.8289 (4.6210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7843 (0.8377)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [12]  [2501/2502]  eta: 0:00:00  lr: 0.002600  min_lr: 0.002600  loss: 4.4760 (4.6177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7609 (0.8350)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [12] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002600  min_lr: 0.002600  loss: 4.4760 (4.6016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7609 (0.8350)
Test:  [ 0/50]  eta: 0:02:13  loss: 1.3233 (1.3233)  acc1: 76.4000 (76.4000)  acc5: 90.8000 (90.8000)  time: 2.6768  data: 2.4812  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.4571 (1.5248)  acc1: 71.6000 (67.5636)  acc5: 89.6000 (89.2727)  time: 0.4567  data: 0.2901  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.5399 (1.5186)  acc1: 65.6000 (67.1619)  acc5: 89.6000 (89.6762)  time: 0.2424  data: 0.0787  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.6880 (1.7400)  acc1: 60.8000 (63.0839)  acc5: 82.0000 (86.0258)  time: 0.2569  data: 0.0897  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 2.3165 (1.8825)  acc1: 52.0000 (60.5171)  acc5: 76.4000 (83.6000)  time: 0.2446  data: 0.0761  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.3165 (1.9216)  acc1: 52.4000 (59.5840)  acc5: 76.8000 (83.0080)  time: 0.1950  data: 0.0297  max mem: 27255
Test: Total time: 0:00:14 (0.2802 s / it)
* Acc@1 59.588 Acc@5 82.970 loss 1.930
Accuracy of the model on the 50000 test images: 59.6%
Max accuracy: 59.59%
Epoch: [13]  [   0/2502]  eta: 1:18:48  lr: 0.002600  min_lr: 0.002600  loss: 4.9581 (4.9581)  weight_decay: 0.0500 (0.0500)  time: 1.8901  data: 1.5361  max mem: 27255
Epoch: [13]  [ 200/2502]  eta: 0:13:09  lr: 0.002616  min_lr: 0.002616  loss: 4.2939 (4.5091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7665 (0.8002)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [13]  [ 400/2502]  eta: 0:11:53  lr: 0.002632  min_lr: 0.002632  loss: 4.7453 (4.5527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7402 (0.8167)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [13]  [ 600/2502]  eta: 0:10:41  lr: 0.002648  min_lr: 0.002648  loss: 4.7795 (4.5534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6948 (0.8043)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [13]  [ 800/2502]  eta: 0:09:33  lr: 0.002664  min_lr: 0.002664  loss: 4.8574 (4.5517)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7274 (0.7884)  time: 0.3421  data: 0.0004  max mem: 27255
Epoch: [13]  [1000/2502]  eta: 0:08:25  lr: 0.002680  min_lr: 0.002680  loss: 4.8211 (4.5517)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6671 (0.7825)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [13]  [1200/2502]  eta: 0:07:17  lr: 0.002696  min_lr: 0.002696  loss: 4.3858 (4.5503)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6881 (0.7747)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [13]  [1400/2502]  eta: 0:06:09  lr: 0.002712  min_lr: 0.002712  loss: 4.6946 (4.5488)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6829 (0.7702)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [13]  [1600/2502]  eta: 0:05:02  lr: 0.002728  min_lr: 0.002728  loss: 4.4870 (4.5387)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7582 (0.7705)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [13]  [1800/2502]  eta: 0:03:55  lr: 0.002744  min_lr: 0.002744  loss: 4.5093 (4.5296)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7379 (0.7701)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [13]  [2000/2502]  eta: 0:02:48  lr: 0.002760  min_lr: 0.002760  loss: 4.7392 (4.5320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7148 (0.7669)  time: 0.3409  data: 0.0004  max mem: 27255
Epoch: [13]  [2200/2502]  eta: 0:01:41  lr: 0.002776  min_lr: 0.002776  loss: 4.7883 (4.5272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7273 (0.7645)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [13]  [2400/2502]  eta: 0:00:34  lr: 0.002792  min_lr: 0.002792  loss: 4.3796 (4.5281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7145 (0.7645)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [13]  [2501/2502]  eta: 0:00:00  lr: 0.002800  min_lr: 0.002800  loss: 4.5764 (4.5291)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6744 (0.7626)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [13] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002800  min_lr: 0.002800  loss: 4.5764 (4.5306)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6744 (0.7626)
Test:  [ 0/50]  eta: 0:02:58  loss: 1.3953 (1.3953)  acc1: 77.2000 (77.2000)  acc5: 91.6000 (91.6000)  time: 3.5729  data: 3.3732  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 1.6869 (1.6970)  acc1: 67.2000 (66.0000)  acc5: 87.2000 (88.1455)  time: 0.5125  data: 0.3448  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.6217 (1.6412)  acc1: 67.2000 (66.2857)  acc5: 89.6000 (89.2191)  time: 0.1977  data: 0.0335  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.8390 (1.8464)  acc1: 59.2000 (63.0194)  acc5: 82.8000 (85.6129)  time: 0.1911  data: 0.0272  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.3491 (1.9826)  acc1: 52.8000 (60.4098)  acc5: 76.4000 (83.2781)  time: 0.2057  data: 0.0420  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.3491 (2.0183)  acc1: 52.8000 (59.6720)  acc5: 76.4000 (82.8720)  time: 0.2104  data: 0.0436  max mem: 27255
Test: Total time: 0:00:13 (0.2662 s / it)
* Acc@1 59.414 Acc@5 82.990 loss 2.026
Accuracy of the model on the 50000 test images: 59.4%
Max accuracy: 59.59%
Epoch: [14]  [   0/2502]  eta: 1:34:30  lr: 0.002800  min_lr: 0.002800  loss: 3.8221 (3.8221)  weight_decay: 0.0500 (0.0500)  time: 2.2665  data: 1.3297  max mem: 27255
Epoch: [14]  [ 200/2502]  eta: 0:13:13  lr: 0.002816  min_lr: 0.002816  loss: 4.8030 (4.5345)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7464 (0.7472)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [14]  [ 400/2502]  eta: 0:11:54  lr: 0.002832  min_lr: 0.002832  loss: 4.1330 (4.4852)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6648 (0.7187)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [14]  [ 600/2502]  eta: 0:10:43  lr: 0.002848  min_lr: 0.002848  loss: 4.9673 (4.4978)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7020 (inf)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [14]  [ 800/2502]  eta: 0:09:34  lr: 0.002864  min_lr: 0.002864  loss: 4.7925 (4.4843)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7166 (inf)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [14]  [1000/2502]  eta: 0:08:25  lr: 0.002880  min_lr: 0.002880  loss: 4.5490 (4.4647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7074 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [14]  [1200/2502]  eta: 0:07:17  lr: 0.002896  min_lr: 0.002896  loss: 4.4846 (4.4614)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6888 (inf)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [14]  [1400/2502]  eta: 0:06:10  lr: 0.002912  min_lr: 0.002912  loss: 4.3833 (4.4633)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6890 (inf)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [14]  [1600/2502]  eta: 0:05:02  lr: 0.002928  min_lr: 0.002928  loss: 4.8087 (4.4650)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6921 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [14]  [1800/2502]  eta: 0:03:55  lr: 0.002944  min_lr: 0.002944  loss: 4.4504 (4.4609)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6974 (inf)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [14]  [2000/2502]  eta: 0:02:48  lr: 0.002960  min_lr: 0.002960  loss: 4.6133 (4.4621)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6347 (inf)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [14]  [2200/2502]  eta: 0:01:41  lr: 0.002976  min_lr: 0.002976  loss: 4.0993 (4.4573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6728 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [14]  [2400/2502]  eta: 0:00:34  lr: 0.002992  min_lr: 0.002992  loss: 4.8571 (4.4595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7109 (inf)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [14]  [2501/2502]  eta: 0:00:00  lr: 0.003000  min_lr: 0.003000  loss: 4.3252 (4.4571)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6883 (inf)  time: 0.3003  data: 0.0009  max mem: 27255
Epoch: [14] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.003000  min_lr: 0.003000  loss: 4.3252 (4.4638)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6883 (inf)
Test:  [ 0/50]  eta: 0:02:10  loss: 1.0762 (1.0762)  acc1: 76.4000 (76.4000)  acc5: 95.2000 (95.2000)  time: 2.6166  data: 2.4188  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.4500 (1.4688)  acc1: 72.4000 (69.3455)  acc5: 89.6000 (89.7818)  time: 0.4065  data: 0.2389  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.4101 (1.4575)  acc1: 70.4000 (68.7619)  acc5: 89.6000 (89.8476)  time: 0.2057  data: 0.0414  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.5722 (1.6749)  acc1: 58.8000 (64.6968)  acc5: 85.6000 (86.4129)  time: 0.2141  data: 0.0503  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.1589 (1.8031)  acc1: 55.2000 (62.2244)  acc5: 78.4000 (84.5561)  time: 0.1943  data: 0.0307  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.1342 (1.8323)  acc1: 56.4000 (61.4160)  acc5: 80.0000 (84.2720)  time: 0.1829  data: 0.0194  max mem: 27255
Test: Total time: 0:00:12 (0.2436 s / it)
* Acc@1 61.002 Acc@5 84.200 loss 1.838
Accuracy of the model on the 50000 test images: 61.0%
Max accuracy: 61.00%
Epoch: [15]  [   0/2502]  eta: 1:34:53  lr: 0.003000  min_lr: 0.003000  loss: 4.8586 (4.8586)  weight_decay: 0.0500 (0.0500)  time: 2.2758  data: 1.9204  max mem: 27255
Epoch: [15]  [ 200/2502]  eta: 0:13:14  lr: 0.003016  min_lr: 0.003016  loss: 4.6447 (4.4603)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7109 (0.7125)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [15]  [ 400/2502]  eta: 0:11:52  lr: 0.003032  min_lr: 0.003032  loss: 4.7280 (4.4305)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6657 (0.7250)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [15]  [ 600/2502]  eta: 0:10:42  lr: 0.003048  min_lr: 0.003048  loss: 4.6175 (4.4153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6470 (0.7171)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [15]  [ 800/2502]  eta: 0:09:33  lr: 0.003064  min_lr: 0.003064  loss: 4.2730 (4.4342)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6683 (0.7048)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [15]  [1000/2502]  eta: 0:08:25  lr: 0.003080  min_lr: 0.003080  loss: 4.3743 (4.4182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6440 (0.6923)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [15]  [1200/2502]  eta: 0:07:17  lr: 0.003096  min_lr: 0.003096  loss: 4.4587 (4.4099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6933 (0.6890)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [15]  [1400/2502]  eta: 0:06:10  lr: 0.003112  min_lr: 0.003112  loss: 4.7256 (4.4085)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6432 (0.6838)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [15]  [1600/2502]  eta: 0:05:02  lr: 0.003128  min_lr: 0.003128  loss: 4.6716 (4.4105)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6425 (0.6796)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [15]  [1800/2502]  eta: 0:03:55  lr: 0.003144  min_lr: 0.003144  loss: 4.0501 (4.4093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6397 (0.6784)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [15]  [2000/2502]  eta: 0:02:48  lr: 0.003160  min_lr: 0.003160  loss: 4.4123 (4.4121)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6132 (0.6740)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [15]  [2200/2502]  eta: 0:01:41  lr: 0.003176  min_lr: 0.003176  loss: 4.6665 (4.4173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6573 (0.6752)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [15]  [2400/2502]  eta: 0:00:34  lr: 0.003192  min_lr: 0.003192  loss: 4.0162 (4.4135)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6569 (0.6714)  time: 0.3428  data: 0.0004  max mem: 27255
Epoch: [15]  [2501/2502]  eta: 0:00:00  lr: 0.003200  min_lr: 0.003200  loss: 4.6763 (4.4141)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7381 (0.6729)  time: 0.3005  data: 0.0008  max mem: 27255
Epoch: [15] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003200  min_lr: 0.003200  loss: 4.6763 (4.4183)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7381 (0.6729)
Test:  [ 0/50]  eta: 0:02:24  loss: 1.1835 (1.1835)  acc1: 80.8000 (80.8000)  acc5: 93.2000 (93.2000)  time: 2.8807  data: 2.6854  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.5165 (1.4858)  acc1: 70.0000 (69.9636)  acc5: 91.6000 (91.1273)  time: 0.4123  data: 0.2454  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.5165 (1.5019)  acc1: 68.0000 (68.9333)  acc5: 90.8000 (91.0095)  time: 0.1853  data: 0.0213  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.6916 (1.6993)  acc1: 61.6000 (65.3419)  acc5: 84.4000 (87.7161)  time: 0.2324  data: 0.0684  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.1695 (1.8163)  acc1: 56.8000 (63.0732)  acc5: 79.6000 (85.7756)  time: 0.2211  data: 0.0573  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.1695 (1.8430)  acc1: 56.4000 (62.3040)  acc5: 79.6000 (85.3520)  time: 0.1732  data: 0.0095  max mem: 27255
Test: Total time: 0:00:12 (0.2514 s / it)
* Acc@1 62.208 Acc@5 84.992 loss 1.845
Accuracy of the model on the 50000 test images: 62.2%
Max accuracy: 62.21%
Epoch: [16]  [   0/2502]  eta: 1:18:01  lr: 0.003200  min_lr: 0.003200  loss: 3.5201 (3.5201)  weight_decay: 0.0500 (0.0500)  time: 1.8709  data: 1.5204  max mem: 27255
Epoch: [16]  [ 200/2502]  eta: 0:13:06  lr: 0.003216  min_lr: 0.003216  loss: 4.2842 (4.3658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6571 (0.6462)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [16]  [ 400/2502]  eta: 0:11:51  lr: 0.003232  min_lr: 0.003232  loss: 4.6091 (4.3980)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6615 (0.6531)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [16]  [ 600/2502]  eta: 0:10:41  lr: 0.003248  min_lr: 0.003248  loss: 4.4339 (4.3985)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6155 (0.6501)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [16]  [ 800/2502]  eta: 0:09:32  lr: 0.003264  min_lr: 0.003264  loss: 4.3007 (4.3788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6404 (0.6415)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [16]  [1000/2502]  eta: 0:08:24  lr: 0.003280  min_lr: 0.003280  loss: 4.5925 (4.3679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5936 (0.6362)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [16]  [1200/2502]  eta: 0:07:17  lr: 0.003296  min_lr: 0.003296  loss: 4.5378 (4.3874)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6501 (0.6449)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [16]  [1400/2502]  eta: 0:06:09  lr: 0.003312  min_lr: 0.003312  loss: 4.3776 (4.3859)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6261 (0.6493)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [16]  [1600/2502]  eta: 0:05:02  lr: 0.003328  min_lr: 0.003328  loss: 4.4615 (4.3826)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6140 (0.6506)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [16]  [1800/2502]  eta: 0:03:55  lr: 0.003344  min_lr: 0.003344  loss: 4.5861 (4.3711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6009 (0.6478)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [16]  [2000/2502]  eta: 0:02:48  lr: 0.003360  min_lr: 0.003360  loss: 4.6911 (4.3710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5956 (0.6441)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [16]  [2200/2502]  eta: 0:01:41  lr: 0.003376  min_lr: 0.003376  loss: 4.6365 (4.3726)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5985 (0.6436)  time: 0.3363  data: 0.0004  max mem: 27255
Epoch: [16]  [2400/2502]  eta: 0:00:34  lr: 0.003392  min_lr: 0.003392  loss: 4.5705 (4.3718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6392 (0.6445)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [16]  [2501/2502]  eta: 0:00:00  lr: 0.003400  min_lr: 0.003400  loss: 4.7487 (4.3729)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6189 (0.6434)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [16] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003400  min_lr: 0.003400  loss: 4.7487 (4.3757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6189 (0.6434)
Test:  [ 0/50]  eta: 0:02:22  loss: 1.3651 (1.3651)  acc1: 78.8000 (78.8000)  acc5: 92.0000 (92.0000)  time: 2.8488  data: 2.6452  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.6601 (1.6326)  acc1: 72.0000 (68.2545)  acc5: 90.4000 (90.3273)  time: 0.4526  data: 0.2850  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.5643 (1.6058)  acc1: 66.8000 (68.8762)  acc5: 90.4000 (90.7810)  time: 0.2403  data: 0.0763  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.8877 (1.7956)  acc1: 61.2000 (65.3806)  acc5: 85.6000 (87.7290)  time: 0.2495  data: 0.0855  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.2620 (1.9182)  acc1: 54.4000 (62.5951)  acc5: 78.8000 (85.5512)  time: 0.2002  data: 0.0363  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.2620 (1.9362)  acc1: 54.8000 (62.2880)  acc5: 78.4000 (85.1680)  time: 0.1999  data: 0.0362  max mem: 27255
Test: Total time: 0:00:13 (0.2647 s / it)
* Acc@1 61.910 Acc@5 85.066 loss 1.941
Accuracy of the model on the 50000 test images: 61.9%
Max accuracy: 62.21%
Epoch: [17]  [   0/2502]  eta: 1:18:46  lr: 0.003400  min_lr: 0.003400  loss: 4.8734 (4.8734)  weight_decay: 0.0500 (0.0500)  time: 1.8890  data: 1.4986  max mem: 27255
Epoch: [17]  [ 200/2502]  eta: 0:13:09  lr: 0.003416  min_lr: 0.003416  loss: 4.1045 (4.3062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5912 (0.6468)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [17]  [ 400/2502]  eta: 0:11:52  lr: 0.003432  min_lr: 0.003432  loss: 4.3222 (4.3252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6180 (0.6481)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [17]  [ 600/2502]  eta: 0:10:42  lr: 0.003448  min_lr: 0.003448  loss: 4.4089 (4.3235)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6300 (0.6427)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [17]  [ 800/2502]  eta: 0:09:33  lr: 0.003464  min_lr: 0.003464  loss: 4.4159 (4.3398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6041 (0.6444)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [17]  [1000/2502]  eta: 0:08:25  lr: 0.003480  min_lr: 0.003480  loss: 4.2159 (4.3444)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6198 (0.6381)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [17]  [1200/2502]  eta: 0:07:17  lr: 0.003496  min_lr: 0.003496  loss: 4.5765 (4.3437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6184 (0.6343)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [17]  [1400/2502]  eta: 0:06:09  lr: 0.003512  min_lr: 0.003512  loss: 4.3218 (4.3357)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5608 (0.6344)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [17]  [1600/2502]  eta: 0:05:02  lr: 0.003528  min_lr: 0.003528  loss: 4.5560 (4.3424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5921 (0.6317)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [17]  [1800/2502]  eta: 0:03:55  lr: 0.003544  min_lr: 0.003544  loss: 4.3855 (4.3523)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6261 (0.6331)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [17]  [2000/2502]  eta: 0:02:48  lr: 0.003560  min_lr: 0.003560  loss: 4.5258 (4.3504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5864 (0.6328)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [17]  [2200/2502]  eta: 0:01:41  lr: 0.003576  min_lr: 0.003576  loss: 4.3615 (4.3465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6254 (0.6327)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [17]  [2400/2502]  eta: 0:00:34  lr: 0.003592  min_lr: 0.003592  loss: 4.3211 (4.3445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6098 (0.6312)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [17]  [2501/2502]  eta: 0:00:00  lr: 0.003600  min_lr: 0.003600  loss: 4.4211 (4.3455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5780 (0.6296)  time: 0.3010  data: 0.0009  max mem: 27255
Epoch: [17] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003600  min_lr: 0.003600  loss: 4.4211 (4.3472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5780 (0.6296)
Test:  [ 0/50]  eta: 0:02:36  loss: 1.1713 (1.1713)  acc1: 81.2000 (81.2000)  acc5: 94.4000 (94.4000)  time: 3.1370  data: 2.9463  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.5187 (1.5309)  acc1: 70.4000 (70.9455)  acc5: 91.2000 (91.2364)  time: 0.4452  data: 0.2786  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.5187 (1.5464)  acc1: 69.6000 (70.1905)  acc5: 91.2000 (91.4286)  time: 0.2006  data: 0.0366  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.7830 (1.7188)  acc1: 61.6000 (66.7097)  acc5: 84.0000 (87.8968)  time: 0.2199  data: 0.0562  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.1651 (1.8379)  acc1: 56.0000 (63.6781)  acc5: 80.0000 (85.9902)  time: 0.2267  data: 0.0631  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.1651 (1.8599)  acc1: 56.0000 (63.0800)  acc5: 80.0000 (85.6160)  time: 0.2086  data: 0.0451  max mem: 27255
Test: Total time: 0:00:13 (0.2657 s / it)
* Acc@1 63.078 Acc@5 85.720 loss 1.863
Accuracy of the model on the 50000 test images: 63.1%
Max accuracy: 63.08%
Epoch: [18]  [   0/2502]  eta: 1:26:40  lr: 0.003600  min_lr: 0.003600  loss: 4.4267 (4.4267)  weight_decay: 0.0500 (0.0500)  time: 2.0783  data: 1.7374  max mem: 27255
Epoch: [18]  [ 200/2502]  eta: 0:13:11  lr: 0.003616  min_lr: 0.003616  loss: 4.2394 (4.3570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5940 (0.6275)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [18]  [ 400/2502]  eta: 0:11:52  lr: 0.003632  min_lr: 0.003632  loss: 4.7370 (4.3355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6044 (0.6120)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [18]  [ 600/2502]  eta: 0:10:41  lr: 0.003648  min_lr: 0.003648  loss: 4.4017 (4.3102)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6126 (0.6071)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [18]  [ 800/2502]  eta: 0:09:32  lr: 0.003664  min_lr: 0.003664  loss: 3.8099 (4.2985)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5778 (0.6162)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [18]  [1000/2502]  eta: 0:08:24  lr: 0.003680  min_lr: 0.003680  loss: 4.4276 (4.2927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5793 (0.6196)  time: 0.3327  data: 0.0003  max mem: 27255
Epoch: [18]  [1200/2502]  eta: 0:07:16  lr: 0.003696  min_lr: 0.003696  loss: 4.5392 (4.3034)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5794 (0.6170)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [18]  [1400/2502]  eta: 0:06:09  lr: 0.003712  min_lr: 0.003712  loss: 4.3831 (4.3079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5710 (0.6172)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [18]  [1600/2502]  eta: 0:05:02  lr: 0.003728  min_lr: 0.003728  loss: 4.3322 (4.3035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5832 (0.6156)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [18]  [1800/2502]  eta: 0:03:55  lr: 0.003744  min_lr: 0.003744  loss: 4.7371 (4.3030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5783 (0.6149)  time: 0.3347  data: 0.0003  max mem: 27255
Epoch: [18]  [2000/2502]  eta: 0:02:48  lr: 0.003760  min_lr: 0.003760  loss: 4.3755 (4.3076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5381 (0.6158)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [18]  [2200/2502]  eta: 0:01:41  lr: 0.003776  min_lr: 0.003776  loss: 4.1605 (4.3055)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5842 (0.6136)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [18]  [2400/2502]  eta: 0:00:34  lr: 0.003792  min_lr: 0.003792  loss: 4.3761 (4.3084)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6264 (0.6137)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [18]  [2501/2502]  eta: 0:00:00  lr: 0.003800  min_lr: 0.003800  loss: 3.9452 (4.3095)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6311 (0.6152)  time: 0.3007  data: 0.0007  max mem: 27255
Epoch: [18] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003800  min_lr: 0.003800  loss: 3.9452 (4.3122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6311 (0.6152)
Test:  [ 0/50]  eta: 0:02:28  loss: 1.1179 (1.1179)  acc1: 80.4000 (80.4000)  acc5: 94.8000 (94.8000)  time: 2.9678  data: 2.7640  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.4050 (1.5012)  acc1: 73.6000 (70.6182)  acc5: 91.2000 (91.0545)  time: 0.4251  data: 0.2574  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.4050 (1.4671)  acc1: 68.8000 (70.4571)  acc5: 91.6000 (91.2952)  time: 0.2144  data: 0.0504  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.5565 (1.6497)  acc1: 64.4000 (67.0581)  acc5: 86.8000 (88.1419)  time: 0.2247  data: 0.0607  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.0990 (1.7591)  acc1: 57.6000 (64.4585)  acc5: 80.4000 (86.2634)  time: 0.1965  data: 0.0297  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.0950 (1.7924)  acc1: 57.2000 (63.6320)  acc5: 80.4000 (85.8160)  time: 0.1828  data: 0.0161  max mem: 27255
Test: Total time: 0:00:12 (0.2553 s / it)
* Acc@1 63.296 Acc@5 85.780 loss 1.795
Accuracy of the model on the 50000 test images: 63.3%
Max accuracy: 63.30%
Epoch: [19]  [   0/2502]  eta: 1:29:38  lr: 0.003800  min_lr: 0.003800  loss: 5.1545 (5.1545)  weight_decay: 0.0500 (0.0500)  time: 2.1497  data: 1.7942  max mem: 27255
Epoch: [19]  [ 200/2502]  eta: 0:13:10  lr: 0.003816  min_lr: 0.003816  loss: 4.6557 (4.2958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5742 (0.5875)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [19]  [ 400/2502]  eta: 0:11:52  lr: 0.003832  min_lr: 0.003832  loss: 4.3566 (4.3292)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5872 (0.5919)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [19]  [ 600/2502]  eta: 0:10:42  lr: 0.003848  min_lr: 0.003848  loss: 3.8366 (4.3039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5781 (0.6080)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [19]  [ 800/2502]  eta: 0:09:33  lr: 0.003864  min_lr: 0.003864  loss: 4.5143 (4.3117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5623 (0.5999)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [19]  [1000/2502]  eta: 0:08:25  lr: 0.003880  min_lr: 0.003880  loss: 4.1462 (4.3160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6359 (0.5987)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [19]  [1200/2502]  eta: 0:07:17  lr: 0.003896  min_lr: 0.003896  loss: 4.0917 (4.3053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5906 (0.5992)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [19]  [1400/2502]  eta: 0:06:10  lr: 0.003912  min_lr: 0.003912  loss: 4.6517 (4.3035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5782 (0.6049)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [19]  [1600/2502]  eta: 0:05:02  lr: 0.003928  min_lr: 0.003928  loss: 4.1386 (4.3063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5638 (0.6015)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [19]  [1800/2502]  eta: 0:03:55  lr: 0.003944  min_lr: 0.003944  loss: 4.1640 (4.3091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6000 (0.6022)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [19]  [2000/2502]  eta: 0:02:48  lr: 0.003960  min_lr: 0.003960  loss: 4.2682 (4.3091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6516 (0.6034)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [19]  [2200/2502]  eta: 0:01:41  lr: 0.003976  min_lr: 0.003976  loss: 4.4239 (4.3036)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5443 (0.6023)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [19]  [2400/2502]  eta: 0:00:34  lr: 0.003992  min_lr: 0.003992  loss: 4.5414 (4.3027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5378 (0.5970)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [19]  [2501/2502]  eta: 0:00:00  lr: 0.004000  min_lr: 0.004000  loss: 4.4075 (4.3056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5664 (0.5975)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [19] Total time: 0:13:58 (0.3349 s / it)
Averaged stats: lr: 0.004000  min_lr: 0.004000  loss: 4.4075 (4.2899)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5664 (0.5975)
Test:  [ 0/50]  eta: 0:02:14  loss: 1.1219 (1.1219)  acc1: 81.2000 (81.2000)  acc5: 94.0000 (94.0000)  time: 2.6853  data: 2.4851  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.4219 (1.3763)  acc1: 74.0000 (72.4364)  acc5: 92.8000 (92.0727)  time: 0.4307  data: 0.2637  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.4662 (1.4217)  acc1: 69.2000 (71.2191)  acc5: 92.0000 (92.0000)  time: 0.1990  data: 0.0353  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.6720 (1.6256)  acc1: 64.8000 (67.6903)  acc5: 85.2000 (88.8129)  time: 0.2143  data: 0.0506  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.0816 (1.7399)  acc1: 58.8000 (65.2781)  acc5: 81.2000 (86.9073)  time: 0.2262  data: 0.0626  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.0411 (1.7746)  acc1: 59.6000 (64.6800)  acc5: 81.6000 (86.4880)  time: 0.1901  data: 0.0266  max mem: 27255
Test: Total time: 0:00:12 (0.2550 s / it)
* Acc@1 64.414 Acc@5 86.356 loss 1.782
Accuracy of the model on the 50000 test images: 64.4%
Max accuracy: 64.41%
Epoch: [20]  [   0/2502]  eta: 1:24:07  lr: 0.004000  min_lr: 0.004000  loss: 3.4483 (3.4483)  weight_decay: 0.0500 (0.0500)  time: 2.0175  data: 1.6603  max mem: 27255
Epoch: [20]  [ 200/2502]  eta: 0:13:08  lr: 0.004000  min_lr: 0.004000  loss: 4.3425 (4.1994)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5931 (0.6277)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [20]  [ 400/2502]  eta: 0:11:53  lr: 0.004000  min_lr: 0.004000  loss: 4.6373 (4.2401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5357 (0.5987)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [20]  [ 600/2502]  eta: 0:10:42  lr: 0.004000  min_lr: 0.004000  loss: 4.1862 (4.2802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5519 (0.5882)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [20]  [ 800/2502]  eta: 0:09:32  lr: 0.004000  min_lr: 0.004000  loss: 4.0644 (4.2703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5218 (0.5852)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [20]  [1000/2502]  eta: 0:08:24  lr: 0.004000  min_lr: 0.004000  loss: 3.9729 (4.2818)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5899 (0.5882)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [20]  [1200/2502]  eta: 0:07:16  lr: 0.004000  min_lr: 0.004000  loss: 4.3255 (4.2816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5946 (0.5884)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [20]  [1400/2502]  eta: 0:06:09  lr: 0.004000  min_lr: 0.004000  loss: 4.4285 (4.2674)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5556 (0.5867)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [20]  [1600/2502]  eta: 0:05:02  lr: 0.004000  min_lr: 0.004000  loss: 4.4197 (4.2689)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5567 (0.5873)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [20]  [1800/2502]  eta: 0:03:55  lr: 0.004000  min_lr: 0.004000  loss: 4.1860 (4.2643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5578 (0.5857)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [20]  [2000/2502]  eta: 0:02:48  lr: 0.004000  min_lr: 0.004000  loss: 4.2501 (4.2668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5894 (0.5875)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [20]  [2200/2502]  eta: 0:01:41  lr: 0.004000  min_lr: 0.004000  loss: 4.5105 (4.2695)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5922 (inf)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [20]  [2400/2502]  eta: 0:00:34  lr: 0.004000  min_lr: 0.004000  loss: 4.4733 (4.2706)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5695 (inf)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [20]  [2501/2502]  eta: 0:00:00  lr: 0.004000  min_lr: 0.004000  loss: 4.0410 (4.2718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5715 (inf)  time: 0.3016  data: 0.0009  max mem: 27255
Epoch: [20] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.004000  min_lr: 0.004000  loss: 4.0410 (4.2541)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5715 (inf)
Test:  [ 0/50]  eta: 0:02:13  loss: 1.0876 (1.0876)  acc1: 83.6000 (83.6000)  acc5: 94.4000 (94.4000)  time: 2.6702  data: 2.4770  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.3631 (1.3803)  acc1: 72.0000 (72.1455)  acc5: 93.2000 (92.5091)  time: 0.4278  data: 0.2612  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.3255 (1.3685)  acc1: 70.4000 (71.6762)  acc5: 92.0000 (92.4191)  time: 0.1949  data: 0.0309  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.5717 (1.5618)  acc1: 66.8000 (67.8581)  acc5: 85.6000 (89.1613)  time: 0.1946  data: 0.0297  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.0422 (1.6754)  acc1: 58.8000 (65.5024)  acc5: 80.8000 (87.3756)  time: 0.2099  data: 0.0450  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 2.0422 (1.7086)  acc1: 58.8000 (64.7840)  acc5: 80.0000 (86.8800)  time: 0.2124  data: 0.0467  max mem: 27255
Test: Total time: 0:00:12 (0.2555 s / it)
* Acc@1 64.650 Acc@5 86.588 loss 1.718
Accuracy of the model on the 50000 test images: 64.7%
Max accuracy: 64.65%
Epoch: [21]  [   0/2502]  eta: 1:21:45  lr: 0.004000  min_lr: 0.004000  loss: 4.7108 (4.7108)  weight_decay: 0.0500 (0.0500)  time: 1.9605  data: 1.6228  max mem: 27255
Epoch: [21]  [ 200/2502]  eta: 0:13:10  lr: 0.004000  min_lr: 0.004000  loss: 4.3729 (4.2278)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5602 (0.5853)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [21]  [ 400/2502]  eta: 0:11:51  lr: 0.004000  min_lr: 0.004000  loss: 4.1954 (4.2102)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5855 (0.5956)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [21]  [ 600/2502]  eta: 0:10:40  lr: 0.004000  min_lr: 0.004000  loss: 4.4693 (4.2081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5504 (0.5954)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [21]  [ 800/2502]  eta: 0:09:32  lr: 0.004000  min_lr: 0.004000  loss: 4.1024 (4.1990)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5643 (0.5971)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [21]  [1000/2502]  eta: 0:08:24  lr: 0.004000  min_lr: 0.004000  loss: 4.3214 (4.1909)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5299 (0.5979)  time: 0.3388  data: 0.0004  max mem: 27255
Epoch: [21]  [1200/2502]  eta: 0:07:17  lr: 0.004000  min_lr: 0.004000  loss: 4.4608 (4.1976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5279 (0.5947)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [21]  [1400/2502]  eta: 0:06:10  lr: 0.004000  min_lr: 0.004000  loss: 4.4763 (4.2045)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5779 (0.5951)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [21]  [1600/2502]  eta: 0:05:02  lr: 0.004000  min_lr: 0.004000  loss: 4.3268 (4.2014)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5740 (0.5947)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [21]  [1800/2502]  eta: 0:03:55  lr: 0.004000  min_lr: 0.004000  loss: 4.2722 (4.2038)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5549 (0.5945)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [21]  [2000/2502]  eta: 0:02:48  lr: 0.004000  min_lr: 0.004000  loss: 4.1389 (4.1969)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5769 (0.5950)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [21]  [2200/2502]  eta: 0:01:41  lr: 0.004000  min_lr: 0.004000  loss: 3.8778 (4.1966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5576 (0.5940)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [21]  [2400/2502]  eta: 0:00:34  lr: 0.004000  min_lr: 0.004000  loss: 4.4390 (4.1952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5937 (0.5952)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [21]  [2501/2502]  eta: 0:00:00  lr: 0.003999  min_lr: 0.003999  loss: 4.3991 (4.1933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5743 (0.5949)  time: 0.3016  data: 0.0009  max mem: 27255
Epoch: [21] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003999  min_lr: 0.003999  loss: 4.3991 (4.2146)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5743 (0.5949)
Test:  [ 0/50]  eta: 0:02:26  loss: 1.0725 (1.0725)  acc1: 84.0000 (84.0000)  acc5: 97.2000 (97.2000)  time: 2.9388  data: 2.7409  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.5281 (1.4507)  acc1: 74.0000 (72.6909)  acc5: 91.6000 (91.8545)  time: 0.4391  data: 0.2656  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.3598 (1.4303)  acc1: 70.4000 (71.6952)  acc5: 92.0000 (92.5143)  time: 0.2216  data: 0.0540  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.7019 (1.5978)  acc1: 66.0000 (68.4000)  acc5: 88.4000 (89.6516)  time: 0.2613  data: 0.0950  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.9882 (1.7108)  acc1: 59.2000 (66.0000)  acc5: 82.4000 (87.9024)  time: 0.2269  data: 0.0583  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.9670 (1.7271)  acc1: 59.6000 (65.4880)  acc5: 83.2000 (87.6000)  time: 0.1750  data: 0.0084  max mem: 27255
Test: Total time: 0:00:13 (0.2695 s / it)
* Acc@1 65.232 Acc@5 87.240 loss 1.732
Accuracy of the model on the 50000 test images: 65.2%
Max accuracy: 65.23%
Epoch: [22]  [   0/2502]  eta: 1:21:56  lr: 0.003999  min_lr: 0.003999  loss: 4.0722 (4.0722)  weight_decay: 0.0500 (0.0500)  time: 1.9651  data: 1.6106  max mem: 27255
Epoch: [22]  [ 200/2502]  eta: 0:13:07  lr: 0.003999  min_lr: 0.003999  loss: 4.4646 (4.1899)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5859 (0.6238)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [22]  [ 400/2502]  eta: 0:11:50  lr: 0.003999  min_lr: 0.003999  loss: 4.0434 (4.1918)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5351 (0.6043)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [22]  [ 600/2502]  eta: 0:10:40  lr: 0.003999  min_lr: 0.003999  loss: 4.4778 (4.2068)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5602 (0.5945)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [22]  [ 800/2502]  eta: 0:09:32  lr: 0.003999  min_lr: 0.003999  loss: 4.0310 (4.1932)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5356 (0.5857)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [22]  [1000/2502]  eta: 0:08:24  lr: 0.003999  min_lr: 0.003999  loss: 4.4196 (4.1955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5996 (0.5871)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [22]  [1200/2502]  eta: 0:07:16  lr: 0.003999  min_lr: 0.003999  loss: 4.5398 (4.1884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5525 (0.5834)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [22]  [1400/2502]  eta: 0:06:09  lr: 0.003999  min_lr: 0.003999  loss: 4.4499 (4.1997)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6030 (0.5849)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [22]  [1600/2502]  eta: 0:05:02  lr: 0.003999  min_lr: 0.003999  loss: 4.4841 (4.1986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5903 (0.5879)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [22]  [1800/2502]  eta: 0:03:55  lr: 0.003999  min_lr: 0.003999  loss: 4.3173 (4.1976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5722 (0.5887)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [22]  [2000/2502]  eta: 0:02:48  lr: 0.003999  min_lr: 0.003999  loss: 4.0737 (4.1943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5378 (0.5879)  time: 0.3392  data: 0.0004  max mem: 27255
Epoch: [22]  [2200/2502]  eta: 0:01:41  lr: 0.003999  min_lr: 0.003999  loss: 4.2499 (4.1895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5608 (0.5870)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [22]  [2400/2502]  eta: 0:00:34  lr: 0.003999  min_lr: 0.003999  loss: 4.5189 (4.1860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6085 (0.5866)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [22]  [2501/2502]  eta: 0:00:00  lr: 0.003999  min_lr: 0.003999  loss: 4.1448 (4.1827)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5786 (0.5868)  time: 0.2998  data: 0.0009  max mem: 27255
Epoch: [22] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.003999  min_lr: 0.003999  loss: 4.1448 (4.1740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5786 (0.5868)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.9211 (0.9211)  acc1: 82.4000 (82.4000)  acc5: 95.2000 (95.2000)  time: 2.7700  data: 2.5686  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.2525 (1.2801)  acc1: 76.0000 (74.3273)  acc5: 93.6000 (92.6546)  time: 0.4103  data: 0.2432  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.2525 (1.2780)  acc1: 74.8000 (73.7714)  acc5: 93.2000 (92.8381)  time: 0.1806  data: 0.0165  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.5078 (1.4795)  acc1: 64.8000 (69.9484)  acc5: 87.6000 (89.9355)  time: 0.2285  data: 0.0642  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.9769 (1.6037)  acc1: 60.4000 (67.5220)  acc5: 82.4000 (88.4390)  time: 0.2463  data: 0.0826  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.9761 (1.6336)  acc1: 59.6000 (66.8240)  acc5: 82.8000 (88.1360)  time: 0.1932  data: 0.0297  max mem: 27255
Test: Total time: 0:00:12 (0.2577 s / it)
* Acc@1 66.416 Acc@5 87.968 loss 1.644
Accuracy of the model on the 50000 test images: 66.4%
Max accuracy: 66.42%
Epoch: [23]  [   0/2502]  eta: 1:12:48  lr: 0.003999  min_lr: 0.003999  loss: 4.7274 (4.7274)  weight_decay: 0.0500 (0.0500)  time: 1.7462  data: 1.3994  max mem: 27255
Epoch: [23]  [ 200/2502]  eta: 0:13:05  lr: 0.003999  min_lr: 0.003999  loss: 4.3324 (4.2155)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6174 (0.6003)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [23]  [ 400/2502]  eta: 0:11:51  lr: 0.003999  min_lr: 0.003999  loss: 4.4110 (4.1514)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5442 (0.5894)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [23]  [ 600/2502]  eta: 0:10:41  lr: 0.003999  min_lr: 0.003999  loss: 4.2397 (4.1699)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5852 (0.5965)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [23]  [ 800/2502]  eta: 0:09:32  lr: 0.003999  min_lr: 0.003999  loss: 3.9173 (4.1579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5414 (0.5926)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [23]  [1000/2502]  eta: 0:08:24  lr: 0.003999  min_lr: 0.003999  loss: 4.3585 (4.1606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5643 (0.5936)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [23]  [1200/2502]  eta: 0:07:17  lr: 0.003998  min_lr: 0.003998  loss: 4.4716 (4.1473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6112 (0.5944)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [23]  [1400/2502]  eta: 0:06:09  lr: 0.003998  min_lr: 0.003998  loss: 4.2309 (4.1508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6023 (0.5910)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [23]  [1600/2502]  eta: 0:05:02  lr: 0.003998  min_lr: 0.003998  loss: 3.9882 (4.1498)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5809 (0.5887)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [23]  [1800/2502]  eta: 0:03:55  lr: 0.003998  min_lr: 0.003998  loss: 3.8559 (4.1492)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5750 (0.5893)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [23]  [2000/2502]  eta: 0:02:48  lr: 0.003998  min_lr: 0.003998  loss: 4.3890 (4.1535)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5714 (0.5877)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [23]  [2200/2502]  eta: 0:01:41  lr: 0.003998  min_lr: 0.003998  loss: 3.7692 (4.1509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5971 (0.5911)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [23]  [2400/2502]  eta: 0:00:34  lr: 0.003998  min_lr: 0.003998  loss: 4.2209 (4.1501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5810 (0.5902)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [23]  [2501/2502]  eta: 0:00:00  lr: 0.003998  min_lr: 0.003998  loss: 4.4015 (4.1511)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5532 (0.5886)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [23] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003998  min_lr: 0.003998  loss: 4.4015 (4.1524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5532 (0.5886)
Test:  [ 0/50]  eta: 0:02:38  loss: 1.2349 (1.2349)  acc1: 83.2000 (83.2000)  acc5: 95.6000 (95.6000)  time: 3.1798  data: 2.9858  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.4553 (1.4970)  acc1: 76.0000 (73.2000)  acc5: 92.8000 (92.5091)  time: 0.4577  data: 0.2904  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.4834 (1.4839)  acc1: 71.2000 (72.8191)  acc5: 92.4000 (92.8952)  time: 0.1984  data: 0.0341  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.6978 (1.6555)  acc1: 66.0000 (69.0710)  acc5: 88.4000 (89.7548)  time: 0.2206  data: 0.0567  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 2.0353 (1.7503)  acc1: 60.0000 (66.9756)  acc5: 83.2000 (88.1268)  time: 0.2098  data: 0.0461  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.9580 (1.7688)  acc1: 61.2000 (66.3040)  acc5: 84.0000 (87.8880)  time: 0.2031  data: 0.0391  max mem: 27255
Test: Total time: 0:00:12 (0.2586 s / it)
* Acc@1 66.258 Acc@5 87.822 loss 1.777
Accuracy of the model on the 50000 test images: 66.3%
Max accuracy: 66.42%
Epoch: [24]  [   0/2502]  eta: 1:24:32  lr: 0.003998  min_lr: 0.003998  loss: 4.5973 (4.5973)  weight_decay: 0.0500 (0.0500)  time: 2.0276  data: 1.3444  max mem: 27255
Epoch: [24]  [ 200/2502]  eta: 0:13:09  lr: 0.003998  min_lr: 0.003998  loss: 4.0280 (4.1635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5535 (0.5757)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [24]  [ 400/2502]  eta: 0:11:51  lr: 0.003998  min_lr: 0.003998  loss: 4.3769 (4.1419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6920 (0.5897)  time: 0.3351  data: 0.0003  max mem: 27255
Epoch: [24]  [ 600/2502]  eta: 0:10:40  lr: 0.003998  min_lr: 0.003998  loss: 4.2074 (4.1493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6014 (0.5843)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [24]  [ 800/2502]  eta: 0:09:33  lr: 0.003998  min_lr: 0.003998  loss: 4.3461 (4.1508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5186 (inf)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [24]  [1000/2502]  eta: 0:08:25  lr: 0.003998  min_lr: 0.003998  loss: 4.1093 (4.1499)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5940 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [24]  [1200/2502]  eta: 0:07:17  lr: 0.003997  min_lr: 0.003997  loss: 3.8132 (4.1359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5768 (inf)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [24]  [1400/2502]  eta: 0:06:09  lr: 0.003997  min_lr: 0.003997  loss: 4.1133 (4.1322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6072 (inf)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [24]  [1600/2502]  eta: 0:05:02  lr: 0.003997  min_lr: 0.003997  loss: 4.1178 (4.1253)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5236 (inf)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [24]  [1800/2502]  eta: 0:03:55  lr: 0.003997  min_lr: 0.003997  loss: 4.2446 (4.1253)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5589 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [24]  [2000/2502]  eta: 0:02:48  lr: 0.003997  min_lr: 0.003997  loss: 4.2897 (4.1277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5560 (inf)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [24]  [2200/2502]  eta: 0:01:41  lr: 0.003997  min_lr: 0.003997  loss: 4.2293 (4.1248)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5487 (inf)  time: 0.3416  data: 0.0004  max mem: 27255
Epoch: [24]  [2400/2502]  eta: 0:00:34  lr: 0.003997  min_lr: 0.003997  loss: 4.1499 (4.1202)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5746 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [24]  [2501/2502]  eta: 0:00:00  lr: 0.003997  min_lr: 0.003997  loss: 4.0908 (4.1184)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5486 (inf)  time: 0.2997  data: 0.0007  max mem: 27255
Epoch: [24] Total time: 0:13:57 (0.3346 s / it)
Averaged stats: lr: 0.003997  min_lr: 0.003997  loss: 4.0908 (4.1217)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5486 (inf)
Test:  [ 0/50]  eta: 0:03:03  loss: 1.0451 (1.0451)  acc1: 88.4000 (88.4000)  acc5: 98.0000 (98.0000)  time: 3.6732  data: 3.4808  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 1.3292 (1.3503)  acc1: 77.6000 (75.4545)  acc5: 92.4000 (93.2000)  time: 0.5148  data: 0.3482  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.3292 (1.3394)  acc1: 71.2000 (74.3429)  acc5: 93.6000 (93.3333)  time: 0.1992  data: 0.0353  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.4171 (1.5039)  acc1: 68.8000 (70.8903)  acc5: 88.8000 (90.5806)  time: 0.2255  data: 0.0617  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.9690 (1.6226)  acc1: 61.2000 (68.4293)  acc5: 83.6000 (88.7805)  time: 0.2315  data: 0.0677  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.9884 (1.6574)  acc1: 60.4000 (67.5760)  acc5: 83.6000 (88.3600)  time: 0.1881  data: 0.0244  max mem: 27255
Test: Total time: 0:00:13 (0.2775 s / it)
* Acc@1 67.400 Acc@5 88.482 loss 1.654
Accuracy of the model on the 50000 test images: 67.4%
Max accuracy: 67.40%
Epoch: [25]  [   0/2502]  eta: 1:26:20  lr: 0.003997  min_lr: 0.003997  loss: 3.9324 (3.9324)  weight_decay: 0.0500 (0.0500)  time: 2.0706  data: 1.7323  max mem: 27255
Epoch: [25]  [ 200/2502]  eta: 0:13:07  lr: 0.003997  min_lr: 0.003997  loss: 4.2981 (4.0920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5318 (0.5911)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [25]  [ 400/2502]  eta: 0:11:50  lr: 0.003997  min_lr: 0.003997  loss: 4.4795 (4.1189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5969 (0.5856)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [25]  [ 600/2502]  eta: 0:10:39  lr: 0.003997  min_lr: 0.003997  loss: 4.4571 (4.1302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6455 (0.5864)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [25]  [ 800/2502]  eta: 0:09:31  lr: 0.003996  min_lr: 0.003996  loss: 4.0905 (4.1170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5639 (0.5803)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [25]  [1000/2502]  eta: 0:08:24  lr: 0.003996  min_lr: 0.003996  loss: 4.1962 (4.1266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5200 (0.5771)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [25]  [1200/2502]  eta: 0:07:16  lr: 0.003996  min_lr: 0.003996  loss: 4.2522 (4.1221)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6023 (0.5830)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [25]  [1400/2502]  eta: 0:06:09  lr: 0.003996  min_lr: 0.003996  loss: 4.1446 (4.1263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5403 (0.5870)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [25]  [1600/2502]  eta: 0:05:02  lr: 0.003996  min_lr: 0.003996  loss: 4.2065 (4.1237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6540 (0.5892)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [25]  [1800/2502]  eta: 0:03:55  lr: 0.003996  min_lr: 0.003996  loss: 4.2470 (4.1165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5597 (0.5887)  time: 0.3433  data: 0.0007  max mem: 27255
Epoch: [25]  [2000/2502]  eta: 0:02:48  lr: 0.003996  min_lr: 0.003996  loss: 4.0409 (4.1170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6054 (0.5910)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [25]  [2200/2502]  eta: 0:01:41  lr: 0.003996  min_lr: 0.003996  loss: 4.3181 (4.1113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5868 (0.5914)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [25]  [2400/2502]  eta: 0:00:34  lr: 0.003996  min_lr: 0.003996  loss: 4.2452 (4.1065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5518 (0.5892)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [25]  [2501/2502]  eta: 0:00:00  lr: 0.003995  min_lr: 0.003995  loss: 4.0247 (4.1064)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5847 (0.5894)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [25] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003995  min_lr: 0.003995  loss: 4.0247 (4.0975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5847 (0.5894)
Test:  [ 0/50]  eta: 0:02:07  loss: 1.1018 (1.1018)  acc1: 84.8000 (84.8000)  acc5: 96.4000 (96.4000)  time: 2.5564  data: 2.3622  max mem: 27255
Test:  [10/50]  eta: 0:00:15  loss: 1.3583 (1.3465)  acc1: 76.4000 (75.7091)  acc5: 93.6000 (93.2000)  time: 0.3924  data: 0.2256  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.3292 (1.3321)  acc1: 76.0000 (74.8191)  acc5: 93.6000 (93.4857)  time: 0.2063  data: 0.0404  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.4772 (1.5029)  acc1: 66.0000 (71.0323)  acc5: 88.0000 (90.6581)  time: 0.2312  data: 0.0654  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.9387 (1.6099)  acc1: 62.8000 (68.4683)  acc5: 84.0000 (89.0439)  time: 0.2167  data: 0.0521  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.9517 (1.6353)  acc1: 61.6000 (67.8000)  acc5: 83.6000 (88.6000)  time: 0.1868  data: 0.0224  max mem: 27255
Test: Total time: 0:00:12 (0.2520 s / it)
* Acc@1 67.442 Acc@5 88.720 loss 1.639
Accuracy of the model on the 50000 test images: 67.4%
Max accuracy: 67.44%
Epoch: [26]  [   0/2502]  eta: 1:27:33  lr: 0.003995  min_lr: 0.003995  loss: 4.3890 (4.3890)  weight_decay: 0.0500 (0.0500)  time: 2.0999  data: 1.7552  max mem: 27255
Epoch: [26]  [ 200/2502]  eta: 0:13:09  lr: 0.003995  min_lr: 0.003995  loss: 4.3615 (4.1203)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5177 (0.5674)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [26]  [ 400/2502]  eta: 0:11:50  lr: 0.003995  min_lr: 0.003995  loss: 4.1608 (4.0938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6031 (0.5783)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [26]  [ 600/2502]  eta: 0:10:40  lr: 0.003995  min_lr: 0.003995  loss: 3.8899 (4.1199)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6199 (0.5938)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [26]  [ 800/2502]  eta: 0:09:32  lr: 0.003995  min_lr: 0.003995  loss: 4.3355 (4.1010)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5563 (0.5941)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [26]  [1000/2502]  eta: 0:08:24  lr: 0.003995  min_lr: 0.003995  loss: 4.3360 (4.0932)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5817 (0.5897)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [26]  [1200/2502]  eta: 0:07:17  lr: 0.003995  min_lr: 0.003995  loss: 4.3893 (4.1036)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6196 (0.5977)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [26]  [1400/2502]  eta: 0:06:09  lr: 0.003995  min_lr: 0.003995  loss: 4.3450 (4.0986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5981 (0.5952)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [26]  [1600/2502]  eta: 0:05:02  lr: 0.003994  min_lr: 0.003994  loss: 4.2553 (4.0921)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6105 (0.5942)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [26]  [1800/2502]  eta: 0:03:55  lr: 0.003994  min_lr: 0.003994  loss: 4.1381 (4.0916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5570 (0.5928)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [26]  [2000/2502]  eta: 0:02:48  lr: 0.003994  min_lr: 0.003994  loss: 4.1629 (4.0958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5597 (0.5907)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [26]  [2200/2502]  eta: 0:01:41  lr: 0.003994  min_lr: 0.003994  loss: 4.0263 (4.0895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (0.5898)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [26]  [2400/2502]  eta: 0:00:34  lr: 0.003994  min_lr: 0.003994  loss: 4.0530 (4.0836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5439 (0.5875)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [26]  [2501/2502]  eta: 0:00:00  lr: 0.003994  min_lr: 0.003994  loss: 4.2316 (4.0822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5835 (0.5883)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [26] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003994  min_lr: 0.003994  loss: 4.2316 (4.0717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5835 (0.5883)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.9052 (0.9052)  acc1: 86.0000 (86.0000)  acc5: 95.6000 (95.6000)  time: 2.6487  data: 2.4553  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.1916 (1.2417)  acc1: 77.6000 (76.0364)  acc5: 94.0000 (93.9636)  time: 0.4103  data: 0.2438  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.2622 (1.2831)  acc1: 74.0000 (74.6476)  acc5: 94.0000 (93.8286)  time: 0.1889  data: 0.0252  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.5184 (1.4747)  acc1: 66.8000 (70.9806)  acc5: 88.8000 (91.0194)  time: 0.2089  data: 0.0452  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.9329 (1.5857)  acc1: 62.8000 (68.6537)  acc5: 83.6000 (89.3659)  time: 0.2271  data: 0.0634  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.8924 (1.6083)  acc1: 62.8000 (68.2880)  acc5: 84.4000 (89.1200)  time: 0.2032  data: 0.0396  max mem: 27255
Test: Total time: 0:00:12 (0.2537 s / it)
* Acc@1 67.852 Acc@5 88.994 loss 1.623
Accuracy of the model on the 50000 test images: 67.9%
Max accuracy: 67.85%
Epoch: [27]  [   0/2502]  eta: 1:16:15  lr: 0.003994  min_lr: 0.003994  loss: 4.1546 (4.1546)  weight_decay: 0.0500 (0.0500)  time: 1.8289  data: 1.4755  max mem: 27255
Epoch: [27]  [ 200/2502]  eta: 0:13:09  lr: 0.003994  min_lr: 0.003994  loss: 4.1556 (4.0464)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6338 (0.6089)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [27]  [ 400/2502]  eta: 0:11:53  lr: 0.003994  min_lr: 0.003994  loss: 4.1436 (4.0432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6158 (0.6103)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [27]  [ 600/2502]  eta: 0:10:42  lr: 0.003993  min_lr: 0.003993  loss: 4.3549 (4.0356)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5532 (0.6053)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [27]  [ 800/2502]  eta: 0:09:33  lr: 0.003993  min_lr: 0.003993  loss: 4.1620 (4.0610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6012 (0.5975)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [27]  [1000/2502]  eta: 0:08:25  lr: 0.003993  min_lr: 0.003993  loss: 4.2202 (4.0833)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5421 (0.5949)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [27]  [1200/2502]  eta: 0:07:17  lr: 0.003993  min_lr: 0.003993  loss: 4.0068 (4.0763)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5416 (0.5914)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [27]  [1400/2502]  eta: 0:06:10  lr: 0.003993  min_lr: 0.003993  loss: 4.3195 (4.0721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5372 (0.5894)  time: 0.3368  data: 0.0004  max mem: 27255
Epoch: [27]  [1600/2502]  eta: 0:05:02  lr: 0.003993  min_lr: 0.003993  loss: 3.7775 (4.0740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6086 (0.5926)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [27]  [1800/2502]  eta: 0:03:55  lr: 0.003993  min_lr: 0.003993  loss: 4.3683 (4.0692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5493 (inf)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [27]  [2000/2502]  eta: 0:02:48  lr: 0.003992  min_lr: 0.003992  loss: 3.7572 (4.0672)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5911 (inf)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [27]  [2200/2502]  eta: 0:01:41  lr: 0.003992  min_lr: 0.003992  loss: 4.0677 (4.0612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5468 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [27]  [2400/2502]  eta: 0:00:34  lr: 0.003992  min_lr: 0.003992  loss: 4.0991 (4.0607)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5713 (inf)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [27]  [2501/2502]  eta: 0:00:00  lr: 0.003992  min_lr: 0.003992  loss: 3.9566 (4.0593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5773 (inf)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [27] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003992  min_lr: 0.003992  loss: 3.9566 (4.0670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5773 (inf)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.9251 (0.9251)  acc1: 85.2000 (85.2000)  acc5: 97.6000 (97.6000)  time: 2.6218  data: 2.4290  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.2504 (1.2687)  acc1: 74.0000 (75.3818)  acc5: 93.6000 (93.6364)  time: 0.4462  data: 0.2796  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.2504 (1.2661)  acc1: 73.6000 (74.7619)  acc5: 93.6000 (93.8857)  time: 0.2500  data: 0.0855  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.4835 (1.4469)  acc1: 68.0000 (71.3032)  acc5: 89.2000 (91.1613)  time: 0.2399  data: 0.0743  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.8766 (1.5660)  acc1: 61.6000 (68.6146)  acc5: 84.8000 (89.4829)  time: 0.1981  data: 0.0331  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.8853 (1.5913)  acc1: 60.8000 (67.9120)  acc5: 84.4000 (89.0640)  time: 0.1759  data: 0.0121  max mem: 27255
Test: Total time: 0:00:13 (0.2632 s / it)
* Acc@1 68.010 Acc@5 88.922 loss 1.602
Accuracy of the model on the 50000 test images: 68.0%
Max accuracy: 68.01%
Epoch: [28]  [   0/2502]  eta: 1:27:59  lr: 0.003992  min_lr: 0.003992  loss: 4.9233 (4.9233)  weight_decay: 0.0500 (0.0500)  time: 2.1102  data: 1.7709  max mem: 27255
Epoch: [28]  [ 200/2502]  eta: 0:13:11  lr: 0.003992  min_lr: 0.003992  loss: 4.3457 (3.9906)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5832 (0.5886)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [28]  [ 400/2502]  eta: 0:11:52  lr: 0.003992  min_lr: 0.003992  loss: 4.2803 (4.0277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6586 (0.6165)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [28]  [ 600/2502]  eta: 0:10:42  lr: 0.003991  min_lr: 0.003991  loss: 4.1585 (4.0370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6098 (0.6128)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [28]  [ 800/2502]  eta: 0:09:33  lr: 0.003991  min_lr: 0.003991  loss: 3.9509 (4.0409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5763 (0.6043)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [28]  [1000/2502]  eta: 0:08:25  lr: 0.003991  min_lr: 0.003991  loss: 4.0651 (4.0448)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5611 (0.5957)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [28]  [1200/2502]  eta: 0:07:17  lr: 0.003991  min_lr: 0.003991  loss: 3.7067 (4.0419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6162 (0.5980)  time: 0.3324  data: 0.0004  max mem: 27255
Epoch: [28]  [1400/2502]  eta: 0:06:09  lr: 0.003991  min_lr: 0.003991  loss: 4.0958 (4.0375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5721 (0.5938)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [28]  [1600/2502]  eta: 0:05:02  lr: 0.003991  min_lr: 0.003991  loss: 4.0456 (4.0342)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5878 (0.5918)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [28]  [1800/2502]  eta: 0:03:55  lr: 0.003990  min_lr: 0.003990  loss: 3.8196 (4.0383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5657 (0.5913)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [28]  [2000/2502]  eta: 0:02:48  lr: 0.003990  min_lr: 0.003990  loss: 4.3157 (4.0350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5603 (0.5941)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [28]  [2200/2502]  eta: 0:01:41  lr: 0.003990  min_lr: 0.003990  loss: 3.9109 (4.0310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5808 (0.5974)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [28]  [2400/2502]  eta: 0:00:34  lr: 0.003990  min_lr: 0.003990  loss: 4.3075 (4.0325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5568 (0.5955)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [28]  [2501/2502]  eta: 0:00:00  lr: 0.003990  min_lr: 0.003990  loss: 3.8781 (4.0303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5656 (0.5952)  time: 0.2998  data: 0.0007  max mem: 27255
Epoch: [28] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003990  min_lr: 0.003990  loss: 3.8781 (4.0339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5656 (0.5952)
Test:  [ 0/50]  eta: 0:02:37  loss: 1.0589 (1.0589)  acc1: 84.8000 (84.8000)  acc5: 96.8000 (96.8000)  time: 3.1451  data: 2.9535  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.2379 (1.2717)  acc1: 77.6000 (75.3818)  acc5: 93.6000 (93.2000)  time: 0.4522  data: 0.2857  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.2379 (1.2835)  acc1: 73.2000 (74.1143)  acc5: 93.6000 (93.2571)  time: 0.1997  data: 0.0358  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.5315 (1.4333)  acc1: 67.2000 (70.4258)  acc5: 88.0000 (90.6065)  time: 0.2266  data: 0.0628  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.8303 (1.5290)  acc1: 61.2000 (68.2341)  acc5: 83.6000 (89.2585)  time: 0.2082  data: 0.0445  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7829 (1.5499)  acc1: 62.4000 (67.8480)  acc5: 83.6000 (88.9840)  time: 0.2080  data: 0.0443  max mem: 27255
Test: Total time: 0:00:12 (0.2577 s / it)
* Acc@1 67.924 Acc@5 88.918 loss 1.547
Accuracy of the model on the 50000 test images: 67.9%
Max accuracy: 68.01%
Epoch: [29]  [   0/2502]  eta: 1:19:06  lr: 0.003990  min_lr: 0.003990  loss: 4.3762 (4.3762)  weight_decay: 0.0500 (0.0500)  time: 1.8972  data: 1.4763  max mem: 27255
Epoch: [29]  [ 200/2502]  eta: 0:13:07  lr: 0.003990  min_lr: 0.003990  loss: 4.1500 (3.9401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5488 (0.5701)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [29]  [ 400/2502]  eta: 0:11:51  lr: 0.003989  min_lr: 0.003989  loss: 4.1213 (3.9878)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5699 (0.5749)  time: 0.3359  data: 0.0004  max mem: 27255
Epoch: [29]  [ 600/2502]  eta: 0:10:42  lr: 0.003989  min_lr: 0.003989  loss: 4.2568 (4.0162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5631 (0.5734)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [29]  [ 800/2502]  eta: 0:09:33  lr: 0.003989  min_lr: 0.003989  loss: 3.8832 (4.0378)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6279 (0.5915)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [29]  [1000/2502]  eta: 0:08:25  lr: 0.003989  min_lr: 0.003989  loss: 4.2468 (4.0368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5744 (0.5874)  time: 0.3416  data: 0.0004  max mem: 27255
Epoch: [29]  [1200/2502]  eta: 0:07:17  lr: 0.003989  min_lr: 0.003989  loss: 3.9155 (4.0263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5714 (0.5897)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [29]  [1400/2502]  eta: 0:06:10  lr: 0.003989  min_lr: 0.003989  loss: 3.8618 (4.0206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5823 (0.5935)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [29]  [1600/2502]  eta: 0:05:02  lr: 0.003988  min_lr: 0.003988  loss: 4.1279 (4.0210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5368 (0.5913)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [29]  [1800/2502]  eta: 0:03:55  lr: 0.003988  min_lr: 0.003988  loss: 4.0474 (4.0192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5927 (0.5919)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [29]  [2000/2502]  eta: 0:02:48  lr: 0.003988  min_lr: 0.003988  loss: 3.8858 (4.0231)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5690 (0.5927)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [29]  [2200/2502]  eta: 0:01:41  lr: 0.003988  min_lr: 0.003988  loss: 4.1995 (4.0246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6057 (0.5963)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [29]  [2400/2502]  eta: 0:00:34  lr: 0.003988  min_lr: 0.003988  loss: 4.2992 (4.0302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5586 (0.5941)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [29]  [2501/2502]  eta: 0:00:00  lr: 0.003987  min_lr: 0.003987  loss: 4.2607 (4.0293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6029 (0.5945)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [29] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003987  min_lr: 0.003987  loss: 4.2607 (4.0269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6029 (0.5945)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.9000 (0.9000)  acc1: 87.2000 (87.2000)  acc5: 95.6000 (95.6000)  time: 2.9778  data: 2.7726  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1732 (1.2593)  acc1: 78.8000 (75.4182)  acc5: 93.6000 (93.2727)  time: 0.4486  data: 0.2807  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.2534 (1.2822)  acc1: 73.2000 (74.4762)  acc5: 93.2000 (93.3143)  time: 0.1983  data: 0.0344  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.5690 (1.4231)  acc1: 67.6000 (71.2903)  acc5: 89.6000 (91.3290)  time: 0.1917  data: 0.0275  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7737 (1.5182)  acc1: 63.6000 (69.2000)  acc5: 86.4000 (89.9317)  time: 0.1731  data: 0.0090  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7552 (1.5512)  acc1: 63.2000 (68.6160)  acc5: 86.4000 (89.5120)  time: 0.1649  data: 0.0001  max mem: 27255
Test: Total time: 0:00:11 (0.2398 s / it)
* Acc@1 68.626 Acc@5 89.308 loss 1.558
Accuracy of the model on the 50000 test images: 68.6%
Max accuracy: 68.63%
Epoch: [30]  [   0/2502]  eta: 1:18:20  lr: 0.003987  min_lr: 0.003987  loss: 3.7106 (3.7106)  weight_decay: 0.0500 (0.0500)  time: 1.8789  data: 1.5244  max mem: 27255
Epoch: [30]  [ 200/2502]  eta: 0:13:12  lr: 0.003987  min_lr: 0.003987  loss: 4.3368 (3.9911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5660 (0.5954)  time: 0.3408  data: 0.0004  max mem: 27255
Epoch: [30]  [ 400/2502]  eta: 0:11:52  lr: 0.003987  min_lr: 0.003987  loss: 3.9882 (3.9946)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5363 (0.5893)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [30]  [ 600/2502]  eta: 0:10:41  lr: 0.003987  min_lr: 0.003987  loss: 4.3498 (4.0174)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6607 (0.5978)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [30]  [ 800/2502]  eta: 0:09:32  lr: 0.003987  min_lr: 0.003987  loss: 4.1557 (4.0361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5586 (0.5971)  time: 0.3402  data: 0.0005  max mem: 27255
Epoch: [30]  [1000/2502]  eta: 0:08:24  lr: 0.003986  min_lr: 0.003986  loss: 3.8146 (4.0170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6425 (0.6068)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [30]  [1200/2502]  eta: 0:07:17  lr: 0.003986  min_lr: 0.003986  loss: 4.3151 (4.0178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6117 (0.6084)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [30]  [1400/2502]  eta: 0:06:09  lr: 0.003986  min_lr: 0.003986  loss: 3.9647 (4.0079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5884 (0.6118)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [30]  [1600/2502]  eta: 0:05:02  lr: 0.003986  min_lr: 0.003986  loss: 4.3783 (4.0110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5456 (0.6109)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [30]  [1800/2502]  eta: 0:03:55  lr: 0.003986  min_lr: 0.003986  loss: 4.1945 (4.0108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5835 (0.6088)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [30]  [2000/2502]  eta: 0:02:48  lr: 0.003985  min_lr: 0.003985  loss: 4.0200 (4.0108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6038 (0.6119)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [30]  [2200/2502]  eta: 0:01:41  lr: 0.003985  min_lr: 0.003985  loss: 4.0189 (4.0179)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6069 (0.6133)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [30]  [2400/2502]  eta: 0:00:34  lr: 0.003985  min_lr: 0.003985  loss: 4.2850 (4.0228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5473 (0.6119)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [30]  [2501/2502]  eta: 0:00:00  lr: 0.003985  min_lr: 0.003985  loss: 3.8097 (4.0223)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6379 (0.6129)  time: 0.3007  data: 0.0010  max mem: 27255
Epoch: [30] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003985  min_lr: 0.003985  loss: 3.8097 (4.0039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6379 (0.6129)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.9757 (0.9757)  acc1: 83.6000 (83.6000)  acc5: 97.2000 (97.2000)  time: 3.3151  data: 3.1275  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.2440 (1.2473)  acc1: 77.6000 (75.4909)  acc5: 94.4000 (94.0364)  time: 0.4559  data: 0.2898  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1786 (1.2238)  acc1: 74.8000 (75.3333)  acc5: 94.4000 (94.2667)  time: 0.2078  data: 0.0439  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3991 (1.3988)  acc1: 69.2000 (72.1161)  acc5: 89.6000 (91.7419)  time: 0.2573  data: 0.0934  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.8367 (1.5089)  acc1: 64.4000 (69.8732)  acc5: 85.2000 (90.1463)  time: 0.2584  data: 0.0939  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.8144 (1.5395)  acc1: 63.2000 (69.0960)  acc5: 84.8000 (89.7680)  time: 0.2059  data: 0.0415  max mem: 27255
Test: Total time: 0:00:14 (0.2849 s / it)
* Acc@1 69.000 Acc@5 89.714 loss 1.542
Accuracy of the model on the 50000 test images: 69.0%
Max accuracy: 69.00%
Epoch: [31]  [   0/2502]  eta: 1:16:39  lr: 0.003985  min_lr: 0.003985  loss: 4.5316 (4.5316)  weight_decay: 0.0500 (0.0500)  time: 1.8382  data: 1.4845  max mem: 27255
Epoch: [31]  [ 200/2502]  eta: 0:13:08  lr: 0.003985  min_lr: 0.003985  loss: 3.5501 (3.9218)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5402 (0.5636)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [31]  [ 400/2502]  eta: 0:11:51  lr: 0.003984  min_lr: 0.003984  loss: 3.8253 (3.9446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5753 (0.5804)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [31]  [ 600/2502]  eta: 0:10:40  lr: 0.003984  min_lr: 0.003984  loss: 4.2566 (3.9612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5728 (0.5927)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [31]  [ 800/2502]  eta: 0:09:31  lr: 0.003984  min_lr: 0.003984  loss: 4.0782 (3.9732)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5845 (0.5970)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [31]  [1000/2502]  eta: 0:08:24  lr: 0.003984  min_lr: 0.003984  loss: 3.9357 (3.9679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5898 (0.6084)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [31]  [1200/2502]  eta: 0:07:17  lr: 0.003983  min_lr: 0.003983  loss: 3.9243 (3.9789)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5949 (0.6076)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [31]  [1400/2502]  eta: 0:06:09  lr: 0.003983  min_lr: 0.003983  loss: 4.0008 (3.9751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5810 (0.6062)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [31]  [1600/2502]  eta: 0:05:02  lr: 0.003983  min_lr: 0.003983  loss: 3.9071 (3.9754)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5661 (inf)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [31]  [1800/2502]  eta: 0:03:55  lr: 0.003983  min_lr: 0.003983  loss: 3.8764 (3.9831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [31]  [2000/2502]  eta: 0:02:48  lr: 0.003983  min_lr: 0.003983  loss: 4.0827 (3.9911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5861 (inf)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [31]  [2200/2502]  eta: 0:01:41  lr: 0.003982  min_lr: 0.003982  loss: 3.9265 (3.9836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5517 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [31]  [2400/2502]  eta: 0:00:34  lr: 0.003982  min_lr: 0.003982  loss: 4.0451 (3.9813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5458 (inf)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [31]  [2501/2502]  eta: 0:00:00  lr: 0.003982  min_lr: 0.003982  loss: 4.2044 (3.9840)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5476 (inf)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [31] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003982  min_lr: 0.003982  loss: 4.2044 (3.9860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5476 (inf)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.9850 (0.9850)  acc1: 83.2000 (83.2000)  acc5: 95.2000 (95.2000)  time: 2.6440  data: 2.4486  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.3284 (1.2970)  acc1: 76.0000 (74.9818)  acc5: 94.0000 (93.6000)  time: 0.4496  data: 0.2829  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.2452 (1.2812)  acc1: 74.8000 (74.8762)  acc5: 94.0000 (93.7524)  time: 0.2217  data: 0.0579  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.4803 (1.4313)  acc1: 69.6000 (72.1161)  acc5: 89.6000 (91.5742)  time: 0.2149  data: 0.0511  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.8007 (1.5293)  acc1: 62.8000 (69.9220)  acc5: 86.0000 (90.1268)  time: 0.1990  data: 0.0354  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7833 (1.5454)  acc1: 63.2000 (69.4160)  acc5: 86.0000 (89.9360)  time: 0.1769  data: 0.0134  max mem: 27255
Test: Total time: 0:00:12 (0.2546 s / it)
* Acc@1 69.388 Acc@5 89.910 loss 1.550
Accuracy of the model on the 50000 test images: 69.4%
Max accuracy: 69.39%
Epoch: [32]  [   0/2502]  eta: 1:35:09  lr: 0.003982  min_lr: 0.003982  loss: 3.7567 (3.7567)  weight_decay: 0.0500 (0.0500)  time: 2.2819  data: 1.9258  max mem: 27255
Epoch: [32]  [ 200/2502]  eta: 0:13:14  lr: 0.003982  min_lr: 0.003982  loss: 3.8835 (3.8887)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5810 (0.6028)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [32]  [ 400/2502]  eta: 0:11:54  lr: 0.003981  min_lr: 0.003981  loss: 4.0375 (3.9498)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5517 (0.6022)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [32]  [ 600/2502]  eta: 0:10:43  lr: 0.003981  min_lr: 0.003981  loss: 4.0978 (3.9512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5673 (0.6095)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [32]  [ 800/2502]  eta: 0:09:33  lr: 0.003981  min_lr: 0.003981  loss: 3.9470 (3.9524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6177 (0.6073)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [32]  [1000/2502]  eta: 0:08:25  lr: 0.003981  min_lr: 0.003981  loss: 4.1702 (3.9824)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5727 (0.6072)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [32]  [1200/2502]  eta: 0:07:17  lr: 0.003980  min_lr: 0.003980  loss: 4.0015 (3.9795)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5551 (0.6005)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [32]  [1400/2502]  eta: 0:06:09  lr: 0.003980  min_lr: 0.003980  loss: 4.1526 (3.9721)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5952 (0.6045)  time: 0.3423  data: 0.0004  max mem: 27255
Epoch: [32]  [1600/2502]  eta: 0:05:02  lr: 0.003980  min_lr: 0.003980  loss: 3.8608 (3.9684)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (0.6021)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [32]  [1800/2502]  eta: 0:03:55  lr: 0.003980  min_lr: 0.003980  loss: 4.2757 (3.9718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5508 (0.5990)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [32]  [2000/2502]  eta: 0:02:48  lr: 0.003979  min_lr: 0.003979  loss: 4.1319 (3.9786)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5724 (0.5987)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [32]  [2200/2502]  eta: 0:01:41  lr: 0.003979  min_lr: 0.003979  loss: 4.1206 (3.9831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6202 (0.5991)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [32]  [2400/2502]  eta: 0:00:34  lr: 0.003979  min_lr: 0.003979  loss: 4.0127 (3.9793)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5250 (0.5985)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [32]  [2501/2502]  eta: 0:00:00  lr: 0.003979  min_lr: 0.003979  loss: 3.8800 (3.9819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6422 (0.5999)  time: 0.2996  data: 0.0007  max mem: 27255
Epoch: [32] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.003979  min_lr: 0.003979  loss: 3.8800 (3.9810)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6422 (0.5999)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.9086 (0.9086)  acc1: 87.2000 (87.2000)  acc5: 98.4000 (98.4000)  time: 3.4271  data: 3.2429  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.2766 (1.2699)  acc1: 77.2000 (77.0545)  acc5: 94.0000 (93.8909)  time: 0.4612  data: 0.2951  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.2850 (1.2868)  acc1: 74.4000 (75.4857)  acc5: 94.4000 (94.0381)  time: 0.1651  data: 0.0010  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.4016 (1.4242)  acc1: 68.8000 (72.3742)  acc5: 90.0000 (91.9355)  time: 0.1919  data: 0.0236  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7759 (1.5152)  acc1: 63.2000 (70.0878)  acc5: 86.4000 (90.5171)  time: 0.2018  data: 0.0335  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7759 (1.5376)  acc1: 62.8000 (69.6160)  acc5: 86.8000 (90.4480)  time: 0.1754  data: 0.0114  max mem: 27255
Test: Total time: 0:00:12 (0.2475 s / it)
* Acc@1 69.780 Acc@5 90.242 loss 1.542
Accuracy of the model on the 50000 test images: 69.8%
Max accuracy: 69.78%
Epoch: [33]  [   0/2502]  eta: 1:12:41  lr: 0.003979  min_lr: 0.003979  loss: 3.8105 (3.8105)  weight_decay: 0.0500 (0.0500)  time: 1.7432  data: 1.3992  max mem: 27255
Epoch: [33]  [ 200/2502]  eta: 0:13:07  lr: 0.003979  min_lr: 0.003979  loss: 4.1604 (4.0331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5392 (0.6079)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [33]  [ 400/2502]  eta: 0:11:51  lr: 0.003978  min_lr: 0.003978  loss: 3.8623 (3.9975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6215 (0.6140)  time: 0.3357  data: 0.0005  max mem: 27255
Epoch: [33]  [ 600/2502]  eta: 0:10:40  lr: 0.003978  min_lr: 0.003978  loss: 4.0494 (3.9777)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6482 (0.6197)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [33]  [ 800/2502]  eta: 0:09:32  lr: 0.003978  min_lr: 0.003978  loss: 3.8858 (3.9860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6393 (0.6244)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [33]  [1000/2502]  eta: 0:08:24  lr: 0.003977  min_lr: 0.003977  loss: 3.9623 (3.9758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5537 (0.6146)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [33]  [1200/2502]  eta: 0:07:17  lr: 0.003977  min_lr: 0.003977  loss: 4.1785 (3.9831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5361 (0.6111)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [33]  [1400/2502]  eta: 0:06:09  lr: 0.003977  min_lr: 0.003977  loss: 4.0571 (3.9841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5530 (0.6154)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [33]  [1600/2502]  eta: 0:05:02  lr: 0.003977  min_lr: 0.003977  loss: 4.3261 (3.9802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6542 (0.6172)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [33]  [1800/2502]  eta: 0:03:55  lr: 0.003976  min_lr: 0.003976  loss: 4.0914 (3.9778)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5783 (0.6128)  time: 0.3347  data: 0.0003  max mem: 27255
Epoch: [33]  [2000/2502]  eta: 0:02:48  lr: 0.003976  min_lr: 0.003976  loss: 4.1815 (3.9802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5589 (0.6123)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [33]  [2200/2502]  eta: 0:01:41  lr: 0.003976  min_lr: 0.003976  loss: 4.0911 (3.9809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5758 (0.6130)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [33]  [2400/2502]  eta: 0:00:34  lr: 0.003976  min_lr: 0.003976  loss: 3.8507 (3.9812)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5962 (0.6108)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [33]  [2501/2502]  eta: 0:00:00  lr: 0.003975  min_lr: 0.003975  loss: 3.9698 (3.9832)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6008 (0.6111)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [33] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003975  min_lr: 0.003975  loss: 3.9698 (3.9772)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6008 (0.6111)
Test:  [ 0/50]  eta: 0:02:09  loss: 1.0361 (1.0361)  acc1: 86.4000 (86.4000)  acc5: 95.2000 (95.2000)  time: 2.5902  data: 2.3773  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.2621 (1.2885)  acc1: 76.8000 (76.7273)  acc5: 94.4000 (93.4909)  time: 0.4377  data: 0.2694  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.3073 (1.3009)  acc1: 75.2000 (76.1905)  acc5: 94.4000 (93.7905)  time: 0.2256  data: 0.0618  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.5294 (1.4513)  acc1: 69.2000 (72.7742)  acc5: 90.4000 (91.6129)  time: 0.2336  data: 0.0696  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.8296 (1.5472)  acc1: 63.2000 (70.2439)  acc5: 86.0000 (90.2829)  time: 0.2068  data: 0.0430  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.8296 (1.5614)  acc1: 63.2000 (69.7600)  acc5: 86.0000 (90.1120)  time: 0.1695  data: 0.0059  max mem: 27255
Test: Total time: 0:00:12 (0.2563 s / it)
* Acc@1 69.826 Acc@5 90.066 loss 1.562
Accuracy of the model on the 50000 test images: 69.8%
Max accuracy: 69.83%
Epoch: [34]  [   0/2502]  eta: 1:15:02  lr: 0.003975  min_lr: 0.003975  loss: 4.3777 (4.3777)  weight_decay: 0.0500 (0.0500)  time: 1.7994  data: 1.4472  max mem: 27255
Epoch: [34]  [ 200/2502]  eta: 0:13:07  lr: 0.003975  min_lr: 0.003975  loss: 3.9665 (3.9441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6034 (0.5816)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [34]  [ 400/2502]  eta: 0:11:52  lr: 0.003975  min_lr: 0.003975  loss: 4.0447 (3.9301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5619 (0.5943)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [34]  [ 600/2502]  eta: 0:10:41  lr: 0.003975  min_lr: 0.003975  loss: 4.1172 (3.9504)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6313 (0.5998)  time: 0.3414  data: 0.0004  max mem: 27255
Epoch: [34]  [ 800/2502]  eta: 0:09:33  lr: 0.003974  min_lr: 0.003974  loss: 3.8056 (3.9558)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5573 (0.5962)  time: 0.3407  data: 0.0006  max mem: 27255
Epoch: [34]  [1000/2502]  eta: 0:08:25  lr: 0.003974  min_lr: 0.003974  loss: 4.2188 (3.9559)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6039 (0.6076)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [34]  [1200/2502]  eta: 0:07:17  lr: 0.003974  min_lr: 0.003974  loss: 3.9450 (3.9452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5606 (0.6089)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [34]  [1400/2502]  eta: 0:06:10  lr: 0.003973  min_lr: 0.003973  loss: 3.9690 (3.9400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5397 (0.6062)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [34]  [1600/2502]  eta: 0:05:02  lr: 0.003973  min_lr: 0.003973  loss: 3.8897 (3.9367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5293 (0.6081)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [34]  [1800/2502]  eta: 0:03:55  lr: 0.003973  min_lr: 0.003973  loss: 4.1146 (3.9307)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5643 (0.6134)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [34]  [2000/2502]  eta: 0:02:48  lr: 0.003972  min_lr: 0.003972  loss: 4.0648 (3.9373)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5825 (0.6138)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [34]  [2200/2502]  eta: 0:01:41  lr: 0.003972  min_lr: 0.003972  loss: 4.2273 (3.9354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6033 (0.6112)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [34]  [2400/2502]  eta: 0:00:34  lr: 0.003972  min_lr: 0.003972  loss: 3.9338 (3.9399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5662 (inf)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [34]  [2501/2502]  eta: 0:00:00  lr: 0.003972  min_lr: 0.003972  loss: 4.2370 (3.9402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5533 (inf)  time: 0.3012  data: 0.0012  max mem: 27255
Epoch: [34] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003972  min_lr: 0.003972  loss: 4.2370 (3.9526)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5533 (inf)
Test:  [ 0/50]  eta: 0:02:10  loss: 1.0687 (1.0687)  acc1: 87.2000 (87.2000)  acc5: 96.8000 (96.8000)  time: 2.6001  data: 2.4169  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.2759 (1.2592)  acc1: 78.0000 (77.2000)  acc5: 95.2000 (94.2545)  time: 0.4044  data: 0.2387  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.2759 (1.2729)  acc1: 74.4000 (76.3810)  acc5: 95.2000 (94.4381)  time: 0.2311  data: 0.0659  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.4417 (1.4356)  acc1: 69.6000 (72.9806)  acc5: 89.6000 (92.1419)  time: 0.2681  data: 0.1024  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7849 (1.5346)  acc1: 64.4000 (70.7122)  acc5: 86.8000 (90.5073)  time: 0.2292  data: 0.0642  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7849 (1.5663)  acc1: 63.2000 (70.0320)  acc5: 86.8000 (90.1760)  time: 0.1816  data: 0.0174  max mem: 27255
Test: Total time: 0:00:13 (0.2677 s / it)
* Acc@1 70.212 Acc@5 90.306 loss 1.564
Accuracy of the model on the 50000 test images: 70.2%
Max accuracy: 70.21%
Epoch: [35]  [   0/2502]  eta: 1:23:35  lr: 0.003972  min_lr: 0.003972  loss: 3.9484 (3.9484)  weight_decay: 0.0500 (0.0500)  time: 2.0045  data: 1.6504  max mem: 27255
Epoch: [35]  [ 200/2502]  eta: 0:13:11  lr: 0.003971  min_lr: 0.003971  loss: 4.0452 (3.9042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6484 (0.6093)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [35]  [ 400/2502]  eta: 0:11:53  lr: 0.003971  min_lr: 0.003971  loss: 4.1091 (3.9195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5264 (0.6073)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [35]  [ 600/2502]  eta: 0:10:42  lr: 0.003971  min_lr: 0.003971  loss: 3.8361 (3.9083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5860 (0.6069)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [35]  [ 800/2502]  eta: 0:09:33  lr: 0.003971  min_lr: 0.003971  loss: 4.0132 (3.9123)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6345 (0.6109)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [35]  [1000/2502]  eta: 0:08:25  lr: 0.003970  min_lr: 0.003970  loss: 3.9953 (3.9279)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5851 (0.6092)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [35]  [1200/2502]  eta: 0:07:17  lr: 0.003970  min_lr: 0.003970  loss: 4.2425 (3.9411)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5706 (0.6104)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [35]  [1400/2502]  eta: 0:06:10  lr: 0.003970  min_lr: 0.003970  loss: 3.8891 (3.9345)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6287 (0.6135)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [35]  [1600/2502]  eta: 0:05:02  lr: 0.003969  min_lr: 0.003969  loss: 3.5864 (3.9287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6330 (0.6117)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [35]  [1800/2502]  eta: 0:03:55  lr: 0.003969  min_lr: 0.003969  loss: 4.0588 (3.9260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6132 (0.6141)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [35]  [2000/2502]  eta: 0:02:48  lr: 0.003969  min_lr: 0.003969  loss: 3.8819 (3.9294)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5434 (0.6117)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [35]  [2200/2502]  eta: 0:01:41  lr: 0.003968  min_lr: 0.003968  loss: 4.0037 (3.9321)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6014 (0.6100)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [35]  [2400/2502]  eta: 0:00:34  lr: 0.003968  min_lr: 0.003968  loss: 3.9061 (3.9348)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5441 (0.6092)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [35]  [2501/2502]  eta: 0:00:00  lr: 0.003968  min_lr: 0.003968  loss: 3.4641 (3.9346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6136 (0.6093)  time: 0.2998  data: 0.0008  max mem: 27255
Epoch: [35] Total time: 0:13:59 (0.3353 s / it)
Averaged stats: lr: 0.003968  min_lr: 0.003968  loss: 3.4641 (3.9381)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6136 (0.6093)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.9530 (0.9530)  acc1: 87.2000 (87.2000)  acc5: 96.0000 (96.0000)  time: 3.0561  data: 2.8555  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1286 (1.1587)  acc1: 79.6000 (77.4182)  acc5: 94.0000 (94.2182)  time: 0.4489  data: 0.2818  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1546 (1.1825)  acc1: 75.2000 (76.1905)  acc5: 94.0000 (94.4000)  time: 0.2050  data: 0.0413  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3203 (1.3313)  acc1: 71.2000 (73.1484)  acc5: 90.8000 (91.8710)  time: 0.2293  data: 0.0655  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7194 (1.4236)  acc1: 64.4000 (70.6732)  acc5: 86.0000 (90.5366)  time: 0.2105  data: 0.0462  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7091 (1.4571)  acc1: 62.8000 (69.9760)  acc5: 86.4000 (90.3280)  time: 0.1752  data: 0.0098  max mem: 27255
Test: Total time: 0:00:12 (0.2597 s / it)
* Acc@1 70.112 Acc@5 90.300 loss 1.459
Accuracy of the model on the 50000 test images: 70.1%
Max accuracy: 70.21%
Epoch: [36]  [   0/2502]  eta: 1:27:52  lr: 0.003968  min_lr: 0.003968  loss: 3.7570 (3.7570)  weight_decay: 0.0500 (0.0500)  time: 2.1074  data: 1.7568  max mem: 27255
Epoch: [36]  [ 200/2502]  eta: 0:13:10  lr: 0.003968  min_lr: 0.003968  loss: 4.0672 (3.8793)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5488 (0.6356)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [36]  [ 400/2502]  eta: 0:11:52  lr: 0.003967  min_lr: 0.003967  loss: 3.9546 (3.8708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5737 (0.6240)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [36]  [ 600/2502]  eta: 0:10:41  lr: 0.003967  min_lr: 0.003967  loss: 3.8751 (3.8914)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5552 (0.6210)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [36]  [ 800/2502]  eta: 0:09:32  lr: 0.003967  min_lr: 0.003967  loss: 3.7270 (3.8915)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6163 (0.6129)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [36]  [1000/2502]  eta: 0:08:24  lr: 0.003966  min_lr: 0.003966  loss: 4.0277 (3.9049)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6044 (0.6181)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [36]  [1200/2502]  eta: 0:07:17  lr: 0.003966  min_lr: 0.003966  loss: 3.6299 (3.9084)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6266 (0.6140)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [36]  [1400/2502]  eta: 0:06:10  lr: 0.003966  min_lr: 0.003966  loss: 3.7694 (3.9016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6388 (0.6150)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [36]  [1600/2502]  eta: 0:05:02  lr: 0.003965  min_lr: 0.003965  loss: 4.2172 (3.9073)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6304 (0.6153)  time: 0.3325  data: 0.0004  max mem: 27255
Epoch: [36]  [1800/2502]  eta: 0:03:55  lr: 0.003965  min_lr: 0.003965  loss: 4.1176 (3.9032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6172 (0.6230)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [36]  [2000/2502]  eta: 0:02:48  lr: 0.003965  min_lr: 0.003965  loss: 4.0043 (3.8989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6624 (0.6223)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [36]  [2200/2502]  eta: 0:01:41  lr: 0.003964  min_lr: 0.003964  loss: 4.1008 (3.8988)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5385 (0.6218)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [36]  [2400/2502]  eta: 0:00:34  lr: 0.003964  min_lr: 0.003964  loss: 3.8646 (3.8998)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6098 (0.6255)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [36]  [2501/2502]  eta: 0:00:00  lr: 0.003964  min_lr: 0.003964  loss: 3.9123 (3.8972)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6191 (0.6264)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [36] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003964  min_lr: 0.003964  loss: 3.9123 (3.9197)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6191 (0.6264)
Test:  [ 0/50]  eta: 0:02:23  loss: 0.7570 (0.7570)  acc1: 87.2000 (87.2000)  acc5: 98.0000 (98.0000)  time: 2.8795  data: 2.6761  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0812 (1.1391)  acc1: 79.6000 (77.7455)  acc5: 95.2000 (95.2000)  time: 0.4660  data: 0.2984  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1844 (1.1811)  acc1: 75.2000 (76.4762)  acc5: 94.8000 (94.8191)  time: 0.2197  data: 0.0557  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.4533 (1.3439)  acc1: 71.6000 (73.4452)  acc5: 90.8000 (92.2581)  time: 0.2138  data: 0.0494  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7393 (1.4344)  acc1: 65.6000 (71.2390)  acc5: 86.0000 (90.9756)  time: 0.1975  data: 0.0331  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7038 (1.4616)  acc1: 65.2000 (70.6000)  acc5: 87.6000 (90.7600)  time: 0.1967  data: 0.0330  max mem: 27255
Test: Total time: 0:00:12 (0.2559 s / it)
* Acc@1 70.632 Acc@5 90.528 loss 1.466
Accuracy of the model on the 50000 test images: 70.6%
Max accuracy: 70.63%
Epoch: [37]  [   0/2502]  eta: 1:38:21  lr: 0.003964  min_lr: 0.003964  loss: 4.2813 (4.2813)  weight_decay: 0.0500 (0.0500)  time: 2.3587  data: 2.0151  max mem: 27255
Epoch: [37]  [ 200/2502]  eta: 0:13:14  lr: 0.003963  min_lr: 0.003963  loss: 3.7809 (3.8807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5762 (0.6158)  time: 0.3419  data: 0.0004  max mem: 27255
Epoch: [37]  [ 400/2502]  eta: 0:11:55  lr: 0.003963  min_lr: 0.003963  loss: 4.2572 (3.8733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6295 (0.6222)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [37]  [ 600/2502]  eta: 0:10:44  lr: 0.003963  min_lr: 0.003963  loss: 4.1227 (3.8979)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5910 (0.6164)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [37]  [ 800/2502]  eta: 0:09:34  lr: 0.003962  min_lr: 0.003962  loss: 4.0505 (3.9026)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6313 (0.6191)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [37]  [1000/2502]  eta: 0:08:26  lr: 0.003962  min_lr: 0.003962  loss: 4.2140 (3.9067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6509 (0.6280)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [37]  [1200/2502]  eta: 0:07:18  lr: 0.003962  min_lr: 0.003962  loss: 4.0240 (3.9067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5457 (0.6194)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [37]  [1400/2502]  eta: 0:06:10  lr: 0.003961  min_lr: 0.003961  loss: 3.7181 (3.9090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5649 (0.6158)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [37]  [1600/2502]  eta: 0:05:03  lr: 0.003961  min_lr: 0.003961  loss: 3.8009 (3.9151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5389 (0.6100)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [37]  [1800/2502]  eta: 0:03:55  lr: 0.003961  min_lr: 0.003961  loss: 4.1826 (3.9202)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6033 (0.6123)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [37]  [2000/2502]  eta: 0:02:48  lr: 0.003960  min_lr: 0.003960  loss: 3.8774 (3.9246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6510 (0.6133)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [37]  [2200/2502]  eta: 0:01:41  lr: 0.003960  min_lr: 0.003960  loss: 4.0937 (3.9245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5827 (0.6147)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [37]  [2400/2502]  eta: 0:00:34  lr: 0.003960  min_lr: 0.003960  loss: 3.9408 (3.9281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5557 (0.6125)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [37]  [2501/2502]  eta: 0:00:00  lr: 0.003959  min_lr: 0.003959  loss: 4.0083 (3.9252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5693 (0.6137)  time: 0.3000  data: 0.0007  max mem: 27255
Epoch: [37] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.003959  min_lr: 0.003959  loss: 4.0083 (3.9193)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5693 (0.6137)
Test:  [ 0/50]  eta: 0:02:13  loss: 1.0053 (1.0053)  acc1: 87.2000 (87.2000)  acc5: 96.8000 (96.8000)  time: 2.6607  data: 2.4557  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.1926 (1.2509)  acc1: 79.6000 (77.6364)  acc5: 94.8000 (94.4000)  time: 0.4521  data: 0.2797  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1926 (1.2513)  acc1: 74.8000 (76.5524)  acc5: 94.8000 (94.5524)  time: 0.1993  data: 0.0313  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.5160 (1.3945)  acc1: 69.6000 (73.4581)  acc5: 91.6000 (92.4258)  time: 0.1765  data: 0.0112  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7797 (1.4937)  acc1: 65.2000 (70.9854)  acc5: 86.4000 (90.9561)  time: 0.1902  data: 0.0266  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7968 (1.5250)  acc1: 63.6000 (70.2400)  acc5: 86.4000 (90.4560)  time: 0.1878  data: 0.0243  max mem: 27255
Test: Total time: 0:00:12 (0.2439 s / it)
* Acc@1 70.312 Acc@5 90.460 loss 1.528
Accuracy of the model on the 50000 test images: 70.3%
Max accuracy: 70.63%
Epoch: [38]  [   0/2502]  eta: 1:35:03  lr: 0.003959  min_lr: 0.003959  loss: 4.0300 (4.0300)  weight_decay: 0.0500 (0.0500)  time: 2.2798  data: 1.6510  max mem: 27255
Epoch: [38]  [ 200/2502]  eta: 0:13:13  lr: 0.003959  min_lr: 0.003959  loss: 3.9995 (3.9331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5676 (0.6191)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [38]  [ 400/2502]  eta: 0:11:53  lr: 0.003959  min_lr: 0.003959  loss: 4.0689 (3.8982)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5497 (0.6154)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [38]  [ 600/2502]  eta: 0:10:41  lr: 0.003958  min_lr: 0.003958  loss: 4.0735 (3.8883)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5920 (inf)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [38]  [ 800/2502]  eta: 0:09:32  lr: 0.003958  min_lr: 0.003958  loss: 4.0257 (3.9048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5917 (inf)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [38]  [1000/2502]  eta: 0:08:25  lr: 0.003958  min_lr: 0.003958  loss: 3.7478 (3.9000)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5827 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [38]  [1200/2502]  eta: 0:07:17  lr: 0.003957  min_lr: 0.003957  loss: 3.9966 (3.9032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5935 (inf)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [38]  [1400/2502]  eta: 0:06:10  lr: 0.003957  min_lr: 0.003957  loss: 4.1149 (3.8927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5701 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [38]  [1600/2502]  eta: 0:05:02  lr: 0.003956  min_lr: 0.003956  loss: 4.0751 (3.9047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6109 (inf)  time: 0.3366  data: 0.0005  max mem: 27255
Epoch: [38]  [1800/2502]  eta: 0:03:55  lr: 0.003956  min_lr: 0.003956  loss: 4.0988 (3.9111)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5644 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [38]  [2000/2502]  eta: 0:02:48  lr: 0.003956  min_lr: 0.003956  loss: 3.7004 (3.9080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5632 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [38]  [2200/2502]  eta: 0:01:41  lr: 0.003955  min_lr: 0.003955  loss: 3.9496 (3.9096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6229 (inf)  time: 0.3427  data: 0.0004  max mem: 27255
Epoch: [38]  [2400/2502]  eta: 0:00:34  lr: 0.003955  min_lr: 0.003955  loss: 4.2550 (3.9130)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5847 (inf)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [38]  [2501/2502]  eta: 0:00:00  lr: 0.003955  min_lr: 0.003955  loss: 3.6653 (3.9077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5930 (inf)  time: 0.3020  data: 0.0010  max mem: 27255
Epoch: [38] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003955  min_lr: 0.003955  loss: 3.6653 (3.9079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5930 (inf)
Test:  [ 0/50]  eta: 0:02:08  loss: 0.7613 (0.7613)  acc1: 88.8000 (88.8000)  acc5: 96.4000 (96.4000)  time: 2.5774  data: 2.3795  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0805 (1.0798)  acc1: 76.4000 (77.5636)  acc5: 94.8000 (94.1818)  time: 0.4049  data: 0.2380  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0512 (1.0887)  acc1: 76.4000 (76.6286)  acc5: 94.0000 (94.4571)  time: 0.1973  data: 0.0334  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3300 (1.2492)  acc1: 70.4000 (73.2645)  acc5: 90.4000 (92.0774)  time: 0.2029  data: 0.0391  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6607 (1.3454)  acc1: 64.4000 (71.1707)  acc5: 86.4000 (90.7317)  time: 0.1853  data: 0.0214  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6050 (1.3693)  acc1: 64.4000 (70.4240)  acc5: 86.4000 (90.4880)  time: 0.1691  data: 0.0054  max mem: 27255
Test: Total time: 0:00:11 (0.2364 s / it)
* Acc@1 70.742 Acc@5 90.710 loss 1.366
Accuracy of the model on the 50000 test images: 70.7%
Max accuracy: 70.74%
Epoch: [39]  [   0/2502]  eta: 1:37:22  lr: 0.003955  min_lr: 0.003955  loss: 3.6437 (3.6437)  weight_decay: 0.0500 (0.0500)  time: 2.3352  data: 1.9949  max mem: 27255
Epoch: [39]  [ 200/2502]  eta: 0:13:14  lr: 0.003954  min_lr: 0.003954  loss: 4.1273 (3.9181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5709 (0.5933)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [39]  [ 400/2502]  eta: 0:11:53  lr: 0.003954  min_lr: 0.003954  loss: 3.6563 (3.8702)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5492 (0.5978)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [39]  [ 600/2502]  eta: 0:10:42  lr: 0.003954  min_lr: 0.003954  loss: 3.7391 (3.8769)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6251 (0.6125)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [39]  [ 800/2502]  eta: 0:09:33  lr: 0.003953  min_lr: 0.003953  loss: 4.2102 (3.8771)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6232 (0.6168)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [39]  [1000/2502]  eta: 0:08:25  lr: 0.003953  min_lr: 0.003953  loss: 4.0636 (3.8911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5890 (0.6221)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [39]  [1200/2502]  eta: 0:07:17  lr: 0.003952  min_lr: 0.003952  loss: 4.1522 (3.9016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5785 (0.6196)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [39]  [1400/2502]  eta: 0:06:10  lr: 0.003952  min_lr: 0.003952  loss: 3.7606 (3.8966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6142 (0.6190)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [39]  [1600/2502]  eta: 0:05:02  lr: 0.003952  min_lr: 0.003952  loss: 4.1071 (3.8980)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5737 (0.6182)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [39]  [1800/2502]  eta: 0:03:55  lr: 0.003951  min_lr: 0.003951  loss: 3.8850 (3.9024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5359 (0.6146)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [39]  [2000/2502]  eta: 0:02:48  lr: 0.003951  min_lr: 0.003951  loss: 4.0515 (3.9088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5999 (0.6183)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [39]  [2200/2502]  eta: 0:01:41  lr: 0.003950  min_lr: 0.003950  loss: 3.9528 (3.9065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5573 (0.6167)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [39]  [2400/2502]  eta: 0:00:34  lr: 0.003950  min_lr: 0.003950  loss: 4.0195 (3.9021)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5270 (0.6162)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [39]  [2501/2502]  eta: 0:00:00  lr: 0.003950  min_lr: 0.003950  loss: 3.9539 (3.9040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6280 (0.6154)  time: 0.3000  data: 0.0008  max mem: 27255
Epoch: [39] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003950  min_lr: 0.003950  loss: 3.9539 (3.8926)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6280 (0.6154)
Test:  [ 0/50]  eta: 0:02:25  loss: 0.8721 (0.8721)  acc1: 89.2000 (89.2000)  acc5: 98.4000 (98.4000)  time: 2.9082  data: 2.7123  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.1653 (1.1828)  acc1: 78.4000 (78.7273)  acc5: 94.0000 (94.5455)  time: 0.4173  data: 0.2502  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.2014 (1.2031)  acc1: 77.2000 (77.4286)  acc5: 95.2000 (94.7619)  time: 0.2075  data: 0.0436  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.4563 (1.3707)  acc1: 70.0000 (73.8452)  acc5: 91.2000 (92.5161)  time: 0.2446  data: 0.0809  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.8170 (1.4775)  acc1: 64.0000 (71.5512)  acc5: 86.0000 (91.0829)  time: 0.2162  data: 0.0526  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.8000 (1.5069)  acc1: 66.0000 (70.9840)  acc5: 86.8000 (90.8480)  time: 0.1774  data: 0.0139  max mem: 27255
Test: Total time: 0:00:12 (0.2590 s / it)
* Acc@1 70.986 Acc@5 90.742 loss 1.511
Accuracy of the model on the 50000 test images: 71.0%
Max accuracy: 70.99%
Epoch: [40]  [   0/2502]  eta: 1:22:28  lr: 0.003950  min_lr: 0.003950  loss: 4.6959 (4.6959)  weight_decay: 0.0500 (0.0500)  time: 1.9777  data: 1.6373  max mem: 27255
Epoch: [40]  [ 200/2502]  eta: 0:13:12  lr: 0.003949  min_lr: 0.003949  loss: 4.0205 (3.8443)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5970 (0.6163)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [40]  [ 400/2502]  eta: 0:11:52  lr: 0.003949  min_lr: 0.003949  loss: 4.0732 (3.8760)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5717 (0.6183)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [40]  [ 600/2502]  eta: 0:10:42  lr: 0.003949  min_lr: 0.003949  loss: 3.9019 (3.8601)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5418 (0.6194)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [40]  [ 800/2502]  eta: 0:09:33  lr: 0.003948  min_lr: 0.003948  loss: 4.1254 (3.8669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6557 (0.6257)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [40]  [1000/2502]  eta: 0:08:25  lr: 0.003948  min_lr: 0.003948  loss: 3.9867 (3.8596)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5859 (0.6243)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [40]  [1200/2502]  eta: 0:07:17  lr: 0.003947  min_lr: 0.003947  loss: 3.6706 (3.8574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5677 (0.6186)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [40]  [1400/2502]  eta: 0:06:10  lr: 0.003947  min_lr: 0.003947  loss: 4.0518 (3.8666)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6340 (0.6221)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [40]  [1600/2502]  eta: 0:05:03  lr: 0.003947  min_lr: 0.003947  loss: 4.0516 (3.8720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5918 (0.6214)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [40]  [1800/2502]  eta: 0:03:55  lr: 0.003946  min_lr: 0.003946  loss: 4.1107 (3.8752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6178 (0.6196)  time: 0.3361  data: 0.0004  max mem: 27255
Epoch: [40]  [2000/2502]  eta: 0:02:48  lr: 0.003946  min_lr: 0.003946  loss: 3.9878 (3.8797)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5916 (0.6203)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [40]  [2200/2502]  eta: 0:01:41  lr: 0.003945  min_lr: 0.003945  loss: 4.0608 (3.8820)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5751 (0.6196)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [40]  [2400/2502]  eta: 0:00:34  lr: 0.003945  min_lr: 0.003945  loss: 3.4702 (3.8788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6985 (0.6267)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [40]  [2501/2502]  eta: 0:00:00  lr: 0.003945  min_lr: 0.003945  loss: 4.0231 (3.8762)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5874 (0.6250)  time: 0.3019  data: 0.0009  max mem: 27255
Epoch: [40] Total time: 0:13:59 (0.3357 s / it)
Averaged stats: lr: 0.003945  min_lr: 0.003945  loss: 4.0231 (3.8754)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5874 (0.6250)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.9191 (0.9191)  acc1: 88.8000 (88.8000)  acc5: 97.6000 (97.6000)  time: 2.6529  data: 2.4509  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1788 (1.1512)  acc1: 79.2000 (78.7273)  acc5: 96.0000 (95.2000)  time: 0.4402  data: 0.2730  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1965 (1.1634)  acc1: 76.0000 (77.9619)  acc5: 95.6000 (95.0095)  time: 0.2080  data: 0.0442  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3422 (1.3230)  acc1: 72.4000 (75.0194)  acc5: 91.2000 (92.8258)  time: 0.2207  data: 0.0569  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7283 (1.4289)  acc1: 66.4000 (72.3220)  acc5: 86.4000 (91.3366)  time: 0.2247  data: 0.0610  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7285 (1.4551)  acc1: 65.2000 (71.6560)  acc5: 86.8000 (91.2320)  time: 0.1870  data: 0.0234  max mem: 27255
Test: Total time: 0:00:12 (0.2584 s / it)
* Acc@1 71.730 Acc@5 91.300 loss 1.454
Accuracy of the model on the 50000 test images: 71.7%
Max accuracy: 71.73%
Epoch: [41]  [   0/2502]  eta: 1:12:15  lr: 0.003945  min_lr: 0.003945  loss: 3.9396 (3.9396)  weight_decay: 0.0500 (0.0500)  time: 1.7328  data: 1.3936  max mem: 27255
Epoch: [41]  [ 200/2502]  eta: 0:13:05  lr: 0.003944  min_lr: 0.003944  loss: 4.0887 (3.8961)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6062 (0.6318)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [41]  [ 400/2502]  eta: 0:11:49  lr: 0.003944  min_lr: 0.003944  loss: 3.8949 (3.8987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5960 (0.6310)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [41]  [ 600/2502]  eta: 0:10:39  lr: 0.003943  min_lr: 0.003943  loss: 3.9959 (3.8855)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5608 (0.6206)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [41]  [ 800/2502]  eta: 0:09:31  lr: 0.003943  min_lr: 0.003943  loss: 4.1071 (3.8829)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6758 (0.6343)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [41]  [1000/2502]  eta: 0:08:23  lr: 0.003943  min_lr: 0.003943  loss: 3.9623 (3.8856)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6134 (0.6321)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [41]  [1200/2502]  eta: 0:07:16  lr: 0.003942  min_lr: 0.003942  loss: 3.9370 (3.8811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6006 (0.6297)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [41]  [1400/2502]  eta: 0:06:09  lr: 0.003942  min_lr: 0.003942  loss: 3.9142 (3.8761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6211 (0.6336)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [41]  [1600/2502]  eta: 0:05:02  lr: 0.003941  min_lr: 0.003941  loss: 3.5338 (3.8802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5885 (0.6286)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [41]  [1800/2502]  eta: 0:03:55  lr: 0.003941  min_lr: 0.003941  loss: 3.5963 (3.8749)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5608 (0.6239)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [41]  [2000/2502]  eta: 0:02:48  lr: 0.003940  min_lr: 0.003940  loss: 4.0731 (3.8769)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5506 (0.6232)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [41]  [2200/2502]  eta: 0:01:41  lr: 0.003940  min_lr: 0.003940  loss: 4.1944 (3.8707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6377 (0.6236)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [41]  [2400/2502]  eta: 0:00:34  lr: 0.003940  min_lr: 0.003940  loss: 4.0750 (3.8713)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5783 (0.6236)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [41]  [2501/2502]  eta: 0:00:00  lr: 0.003939  min_lr: 0.003939  loss: 4.0032 (3.8715)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5727 (0.6225)  time: 0.3003  data: 0.0009  max mem: 27255
Epoch: [41] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003939  min_lr: 0.003939  loss: 4.0032 (3.8752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5727 (0.6225)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.8627 (0.8627)  acc1: 87.2000 (87.2000)  acc5: 96.4000 (96.4000)  time: 2.6512  data: 2.4463  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1978 (1.2269)  acc1: 77.6000 (78.4364)  acc5: 95.6000 (95.1273)  time: 0.4317  data: 0.2642  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1864 (1.2189)  acc1: 77.2000 (77.7333)  acc5: 95.6000 (95.3333)  time: 0.2000  data: 0.0355  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3534 (1.3848)  acc1: 72.0000 (74.4387)  acc5: 90.4000 (92.9032)  time: 0.1973  data: 0.0328  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7772 (1.4826)  acc1: 65.2000 (72.1366)  acc5: 86.0000 (91.3561)  time: 0.1922  data: 0.0286  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7608 (1.5032)  acc1: 65.6000 (71.6000)  acc5: 86.0000 (91.1440)  time: 0.1734  data: 0.0099  max mem: 27255
Test: Total time: 0:00:12 (0.2417 s / it)
* Acc@1 71.544 Acc@5 91.144 loss 1.504
Accuracy of the model on the 50000 test images: 71.5%
Max accuracy: 71.73%
Epoch: [42]  [   0/2502]  eta: 1:29:56  lr: 0.003939  min_lr: 0.003939  loss: 3.9426 (3.9426)  weight_decay: 0.0500 (0.0500)  time: 2.1568  data: 1.4926  max mem: 27255
Epoch: [42]  [ 200/2502]  eta: 0:13:13  lr: 0.003939  min_lr: 0.003939  loss: 3.6905 (3.8520)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6308 (0.6454)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [42]  [ 400/2502]  eta: 0:11:54  lr: 0.003939  min_lr: 0.003939  loss: 4.1231 (3.8569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5560 (0.6243)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [42]  [ 600/2502]  eta: 0:10:42  lr: 0.003938  min_lr: 0.003938  loss: 3.8313 (3.8683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6312 (0.6292)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [42]  [ 800/2502]  eta: 0:09:33  lr: 0.003938  min_lr: 0.003938  loss: 3.8261 (3.8680)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5907 (0.6321)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [42]  [1000/2502]  eta: 0:08:25  lr: 0.003937  min_lr: 0.003937  loss: 4.1557 (3.8763)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6296 (0.6362)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [42]  [1200/2502]  eta: 0:07:17  lr: 0.003937  min_lr: 0.003937  loss: 3.8528 (3.8750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5952 (0.6369)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [42]  [1400/2502]  eta: 0:06:10  lr: 0.003936  min_lr: 0.003936  loss: 3.7640 (3.8755)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3416  data: 0.0004  max mem: 27255
Epoch: [42]  [1600/2502]  eta: 0:05:02  lr: 0.003936  min_lr: 0.003936  loss: 4.0448 (3.8711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6561 (nan)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [42]  [1800/2502]  eta: 0:03:55  lr: 0.003935  min_lr: 0.003935  loss: 3.9370 (3.8720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6408 (nan)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [42]  [2000/2502]  eta: 0:02:48  lr: 0.003935  min_lr: 0.003935  loss: 3.8101 (3.8723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6211 (nan)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [42]  [2200/2502]  eta: 0:01:41  lr: 0.003934  min_lr: 0.003934  loss: 3.7627 (3.8657)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5776 (nan)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [42]  [2400/2502]  eta: 0:00:34  lr: 0.003934  min_lr: 0.003934  loss: 4.2118 (3.8670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5806 (nan)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [42]  [2501/2502]  eta: 0:00:00  lr: 0.003934  min_lr: 0.003934  loss: 4.0405 (3.8668)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5622 (nan)  time: 0.3007  data: 0.0009  max mem: 27255
Epoch: [42] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003934  min_lr: 0.003934  loss: 4.0405 (3.8701)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5622 (nan)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.7154 (0.7154)  acc1: 87.6000 (87.6000)  acc5: 98.0000 (98.0000)  time: 2.6573  data: 2.4568  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0414 (1.0838)  acc1: 79.2000 (78.9091)  acc5: 95.2000 (95.0546)  time: 0.4104  data: 0.2363  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0924 (1.1191)  acc1: 77.6000 (77.7333)  acc5: 95.2000 (94.8762)  time: 0.1929  data: 0.0220  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.4518 (1.2781)  acc1: 72.0000 (74.8258)  acc5: 90.0000 (92.8516)  time: 0.2053  data: 0.0383  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7211 (1.3780)  acc1: 66.0000 (72.5951)  acc5: 87.2000 (91.5122)  time: 0.2097  data: 0.0459  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7020 (1.4069)  acc1: 65.6000 (71.9520)  acc5: 87.2000 (91.3040)  time: 0.1907  data: 0.0271  max mem: 27255
Test: Total time: 0:00:12 (0.2475 s / it)
* Acc@1 71.728 Acc@5 91.276 loss 1.405
Accuracy of the model on the 50000 test images: 71.7%
Max accuracy: 71.73%
Epoch: [43]  [   0/2502]  eta: 1:42:09  lr: 0.003934  min_lr: 0.003934  loss: 3.2257 (3.2257)  weight_decay: 0.0500 (0.0500)  time: 2.4497  data: 1.5113  max mem: 27255
Epoch: [43]  [ 200/2502]  eta: 0:13:15  lr: 0.003933  min_lr: 0.003933  loss: 4.2806 (3.9003)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5664 (0.6169)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [43]  [ 400/2502]  eta: 0:11:55  lr: 0.003933  min_lr: 0.003933  loss: 4.0913 (3.9125)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6621 (0.6466)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [43]  [ 600/2502]  eta: 0:10:44  lr: 0.003932  min_lr: 0.003932  loss: 4.1727 (3.8948)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5346 (0.6321)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [43]  [ 800/2502]  eta: 0:09:34  lr: 0.003932  min_lr: 0.003932  loss: 3.7671 (3.8891)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6175 (0.6324)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [43]  [1000/2502]  eta: 0:08:26  lr: 0.003931  min_lr: 0.003931  loss: 3.9996 (3.8898)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5997 (0.6291)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [43]  [1200/2502]  eta: 0:07:18  lr: 0.003931  min_lr: 0.003931  loss: 3.9296 (3.8708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5717 (0.6289)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [43]  [1400/2502]  eta: 0:06:10  lr: 0.003931  min_lr: 0.003931  loss: 3.9968 (3.8684)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5728 (0.6265)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [43]  [1600/2502]  eta: 0:05:03  lr: 0.003930  min_lr: 0.003930  loss: 3.7633 (3.8580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5992 (0.6273)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [43]  [1800/2502]  eta: 0:03:55  lr: 0.003930  min_lr: 0.003930  loss: 3.9517 (3.8536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5752 (0.6253)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [43]  [2000/2502]  eta: 0:02:48  lr: 0.003929  min_lr: 0.003929  loss: 4.1043 (3.8573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5726 (0.6254)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [43]  [2200/2502]  eta: 0:01:41  lr: 0.003929  min_lr: 0.003929  loss: 3.8151 (3.8521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5846 (0.6234)  time: 0.3356  data: 0.0005  max mem: 27255
Epoch: [43]  [2400/2502]  eta: 0:00:34  lr: 0.003928  min_lr: 0.003928  loss: 3.8733 (3.8556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6222 (0.6237)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [43]  [2501/2502]  eta: 0:00:00  lr: 0.003928  min_lr: 0.003928  loss: 3.9562 (3.8577)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6396 (0.6247)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [43] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.003928  min_lr: 0.003928  loss: 3.9562 (3.8539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6396 (0.6247)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.7736 (0.7736)  acc1: 90.4000 (90.4000)  acc5: 96.8000 (96.8000)  time: 3.0433  data: 2.8581  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.1936 (1.1811)  acc1: 79.6000 (79.0545)  acc5: 95.6000 (94.8364)  time: 0.4757  data: 0.3100  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.2127 (1.2031)  acc1: 76.0000 (77.6571)  acc5: 95.6000 (95.0857)  time: 0.2448  data: 0.0811  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3753 (1.3495)  acc1: 71.2000 (74.5806)  acc5: 91.2000 (93.1226)  time: 0.2383  data: 0.0747  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7160 (1.4404)  acc1: 66.0000 (72.3902)  acc5: 87.2000 (91.7756)  time: 0.1887  data: 0.0213  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7160 (1.4658)  acc1: 66.0000 (71.6400)  acc5: 87.2000 (91.3520)  time: 0.1897  data: 0.0211  max mem: 27255
Test: Total time: 0:00:13 (0.2661 s / it)
* Acc@1 71.644 Acc@5 91.232 loss 1.467
Accuracy of the model on the 50000 test images: 71.6%
Max accuracy: 71.73%
Epoch: [44]  [   0/2502]  eta: 1:33:20  lr: 0.003928  min_lr: 0.003928  loss: 3.3717 (3.3717)  weight_decay: 0.0500 (0.0500)  time: 2.2383  data: 1.4406  max mem: 27255
Epoch: [44]  [ 200/2502]  eta: 0:13:11  lr: 0.003927  min_lr: 0.003927  loss: 3.9869 (3.8234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5908 (0.6197)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [44]  [ 400/2502]  eta: 0:11:52  lr: 0.003927  min_lr: 0.003927  loss: 3.5399 (3.8097)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6195 (0.6478)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [44]  [ 600/2502]  eta: 0:10:41  lr: 0.003927  min_lr: 0.003927  loss: 3.7164 (3.8132)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5215 (0.6451)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [44]  [ 800/2502]  eta: 0:09:33  lr: 0.003926  min_lr: 0.003926  loss: 3.9750 (3.8172)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5747 (0.6388)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [44]  [1000/2502]  eta: 0:08:25  lr: 0.003926  min_lr: 0.003926  loss: 4.0722 (3.8289)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5890 (0.6378)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [44]  [1200/2502]  eta: 0:07:18  lr: 0.003925  min_lr: 0.003925  loss: 3.5266 (3.8456)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5980 (0.6323)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [44]  [1400/2502]  eta: 0:06:10  lr: 0.003925  min_lr: 0.003925  loss: 3.7461 (3.8407)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6022 (0.6324)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [44]  [1600/2502]  eta: 0:05:03  lr: 0.003924  min_lr: 0.003924  loss: 4.1436 (3.8427)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5575 (0.6280)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [44]  [1800/2502]  eta: 0:03:55  lr: 0.003924  min_lr: 0.003924  loss: 3.9300 (3.8392)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5901 (0.6255)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [44]  [2000/2502]  eta: 0:02:48  lr: 0.003923  min_lr: 0.003923  loss: 4.0366 (3.8351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5734 (0.6266)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [44]  [2200/2502]  eta: 0:01:41  lr: 0.003923  min_lr: 0.003923  loss: 4.0540 (3.8437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (0.6275)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [44]  [2400/2502]  eta: 0:00:34  lr: 0.003922  min_lr: 0.003922  loss: 3.8798 (3.8451)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5724 (0.6263)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [44]  [2501/2502]  eta: 0:00:00  lr: 0.003922  min_lr: 0.003922  loss: 3.8617 (3.8444)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7333 (0.6308)  time: 0.3017  data: 0.0007  max mem: 27255
Epoch: [44] Total time: 0:13:59 (0.3353 s / it)
Averaged stats: lr: 0.003922  min_lr: 0.003922  loss: 3.8617 (3.8463)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7333 (0.6308)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.7972 (0.7972)  acc1: 89.2000 (89.2000)  acc5: 96.8000 (96.8000)  time: 2.6108  data: 2.4084  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.1086 (1.1193)  acc1: 79.6000 (78.8000)  acc5: 94.8000 (94.8727)  time: 0.4198  data: 0.2461  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1345 (1.1402)  acc1: 77.2000 (77.4476)  acc5: 95.2000 (95.0857)  time: 0.1884  data: 0.0210  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3792 (1.3086)  acc1: 71.2000 (74.5677)  acc5: 90.8000 (92.8258)  time: 0.2019  data: 0.0378  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6901 (1.4046)  acc1: 67.6000 (72.5268)  acc5: 87.6000 (91.4537)  time: 0.2142  data: 0.0502  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6726 (1.4346)  acc1: 67.2000 (72.0000)  acc5: 87.6000 (91.1520)  time: 0.1897  data: 0.0259  max mem: 27255
Test: Total time: 0:00:12 (0.2477 s / it)
* Acc@1 71.976 Acc@5 91.260 loss 1.434
Accuracy of the model on the 50000 test images: 72.0%
Max accuracy: 71.98%
Epoch: [45]  [   0/2502]  eta: 1:13:17  lr: 0.003922  min_lr: 0.003922  loss: 3.9966 (3.9966)  weight_decay: 0.0500 (0.0500)  time: 1.7574  data: 1.4188  max mem: 27255
Epoch: [45]  [ 200/2502]  eta: 0:13:07  lr: 0.003921  min_lr: 0.003921  loss: 3.8835 (3.8138)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5774 (0.6309)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [45]  [ 400/2502]  eta: 0:11:51  lr: 0.003921  min_lr: 0.003921  loss: 4.1917 (3.8244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6587 (0.6395)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [45]  [ 600/2502]  eta: 0:10:41  lr: 0.003920  min_lr: 0.003920  loss: 3.5943 (3.8283)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5268 (0.6233)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [45]  [ 800/2502]  eta: 0:09:32  lr: 0.003920  min_lr: 0.003920  loss: 4.1250 (3.8314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6060 (0.6253)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [45]  [1000/2502]  eta: 0:08:24  lr: 0.003919  min_lr: 0.003919  loss: 3.8535 (3.8280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5582 (0.6161)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [45]  [1200/2502]  eta: 0:07:17  lr: 0.003919  min_lr: 0.003919  loss: 3.9304 (3.8297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5721 (0.6148)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [45]  [1400/2502]  eta: 0:06:09  lr: 0.003918  min_lr: 0.003918  loss: 4.0292 (3.8290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5603 (0.6192)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [45]  [1600/2502]  eta: 0:05:02  lr: 0.003918  min_lr: 0.003918  loss: 3.7743 (3.8291)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5655 (0.6191)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [45]  [1800/2502]  eta: 0:03:55  lr: 0.003917  min_lr: 0.003917  loss: 3.8016 (3.8344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5709 (0.6197)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [45]  [2000/2502]  eta: 0:02:48  lr: 0.003917  min_lr: 0.003917  loss: 3.9196 (3.8432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5863 (0.6200)  time: 0.3407  data: 0.0006  max mem: 27255
Epoch: [45]  [2200/2502]  eta: 0:01:41  lr: 0.003916  min_lr: 0.003916  loss: 3.9967 (3.8491)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6747 (0.6251)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [45]  [2400/2502]  eta: 0:00:34  lr: 0.003916  min_lr: 0.003916  loss: 3.8654 (3.8520)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5843 (0.6233)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [45]  [2501/2502]  eta: 0:00:00  lr: 0.003916  min_lr: 0.003916  loss: 3.4745 (3.8524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5731 (0.6228)  time: 0.3012  data: 0.0009  max mem: 27255
Epoch: [45] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003916  min_lr: 0.003916  loss: 3.4745 (3.8479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5731 (0.6228)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.7458 (0.7458)  acc1: 88.0000 (88.0000)  acc5: 97.6000 (97.6000)  time: 2.6332  data: 2.4362  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0947 (1.1045)  acc1: 77.6000 (79.2000)  acc5: 94.8000 (94.6546)  time: 0.4542  data: 0.2874  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1218 (1.1172)  acc1: 76.4000 (78.0381)  acc5: 94.8000 (94.8000)  time: 0.2423  data: 0.0785  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2595 (1.2638)  acc1: 72.0000 (74.8645)  acc5: 91.2000 (92.9032)  time: 0.2181  data: 0.0513  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6547 (1.3601)  acc1: 66.4000 (72.4195)  acc5: 87.2000 (91.5512)  time: 0.1790  data: 0.0122  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6466 (1.3853)  acc1: 66.0000 (71.6560)  acc5: 88.0000 (91.3200)  time: 0.1672  data: 0.0033  max mem: 27255
Test: Total time: 0:00:12 (0.2523 s / it)
* Acc@1 71.556 Acc@5 91.348 loss 1.388
Accuracy of the model on the 50000 test images: 71.6%
Max accuracy: 71.98%
Epoch: [46]  [   0/2502]  eta: 1:21:27  lr: 0.003916  min_lr: 0.003916  loss: 4.3029 (4.3029)  weight_decay: 0.0500 (0.0500)  time: 1.9532  data: 1.5663  max mem: 27255
Epoch: [46]  [ 200/2502]  eta: 0:13:06  lr: 0.003915  min_lr: 0.003915  loss: 3.8282 (3.7438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5693 (0.6161)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [46]  [ 400/2502]  eta: 0:11:49  lr: 0.003914  min_lr: 0.003914  loss: 3.7843 (3.8145)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6378 (0.6326)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [46]  [ 600/2502]  eta: 0:10:40  lr: 0.003914  min_lr: 0.003914  loss: 3.9017 (3.8412)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6118 (0.6371)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [46]  [ 800/2502]  eta: 0:09:31  lr: 0.003913  min_lr: 0.003913  loss: 3.8916 (3.8430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5980 (0.6345)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [46]  [1000/2502]  eta: 0:08:23  lr: 0.003913  min_lr: 0.003913  loss: 4.1367 (3.8450)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6215 (0.6373)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [46]  [1200/2502]  eta: 0:07:16  lr: 0.003912  min_lr: 0.003912  loss: 4.0475 (3.8418)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6583 (0.6457)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [46]  [1400/2502]  eta: 0:06:09  lr: 0.003912  min_lr: 0.003912  loss: 3.7897 (3.8440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6752 (0.6488)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [46]  [1600/2502]  eta: 0:05:02  lr: 0.003911  min_lr: 0.003911  loss: 3.9164 (3.8510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5810 (0.6434)  time: 0.3406  data: 0.0005  max mem: 27255
Epoch: [46]  [1800/2502]  eta: 0:03:55  lr: 0.003911  min_lr: 0.003911  loss: 4.0205 (3.8563)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5804 (0.6398)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [46]  [2000/2502]  eta: 0:02:48  lr: 0.003910  min_lr: 0.003910  loss: 3.9409 (3.8561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6568 (0.6422)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [46]  [2200/2502]  eta: 0:01:41  lr: 0.003910  min_lr: 0.003910  loss: 3.9448 (3.8574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5763 (inf)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [46]  [2400/2502]  eta: 0:00:34  lr: 0.003909  min_lr: 0.003909  loss: 4.0114 (3.8570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7222 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [46]  [2501/2502]  eta: 0:00:00  lr: 0.003909  min_lr: 0.003909  loss: 3.7292 (3.8567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6011 (inf)  time: 0.3000  data: 0.0007  max mem: 27255
Epoch: [46] Total time: 0:13:57 (0.3346 s / it)
Averaged stats: lr: 0.003909  min_lr: 0.003909  loss: 3.7292 (3.8418)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6011 (inf)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.8537 (0.8537)  acc1: 89.6000 (89.6000)  acc5: 98.8000 (98.8000)  time: 2.9264  data: 2.7222  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1631 (1.1243)  acc1: 78.4000 (78.3273)  acc5: 94.8000 (94.8727)  time: 0.4441  data: 0.2763  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0899 (1.0919)  acc1: 76.4000 (77.9048)  acc5: 94.8000 (94.9905)  time: 0.2344  data: 0.0704  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2397 (1.2436)  acc1: 72.8000 (75.2258)  acc5: 91.2000 (92.9419)  time: 0.2460  data: 0.0821  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6373 (1.3504)  acc1: 66.4000 (72.9171)  acc5: 86.8000 (91.5317)  time: 0.1929  data: 0.0292  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6373 (1.3762)  acc1: 65.6000 (72.1840)  acc5: 87.6000 (91.4240)  time: 0.1655  data: 0.0018  max mem: 27255
Test: Total time: 0:00:13 (0.2611 s / it)
* Acc@1 71.964 Acc@5 91.410 loss 1.384
Accuracy of the model on the 50000 test images: 72.0%
Max accuracy: 71.98%
Epoch: [47]  [   0/2502]  eta: 1:37:22  lr: 0.003909  min_lr: 0.003909  loss: 4.0321 (4.0321)  weight_decay: 0.0500 (0.0500)  time: 2.3350  data: 1.7314  max mem: 27255
Epoch: [47]  [ 200/2502]  eta: 0:13:10  lr: 0.003908  min_lr: 0.003908  loss: 3.8945 (3.8412)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5660 (0.6235)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [47]  [ 400/2502]  eta: 0:11:52  lr: 0.003908  min_lr: 0.003908  loss: 3.9748 (3.8477)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5994 (0.6327)  time: 0.3408  data: 0.0004  max mem: 27255
Epoch: [47]  [ 600/2502]  eta: 0:10:42  lr: 0.003907  min_lr: 0.003907  loss: 4.0255 (3.8430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6259 (0.6351)  time: 0.3386  data: 0.0004  max mem: 27255
Epoch: [47]  [ 800/2502]  eta: 0:09:33  lr: 0.003907  min_lr: 0.003907  loss: 4.0649 (3.8437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5300 (0.6190)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [47]  [1000/2502]  eta: 0:08:25  lr: 0.003906  min_lr: 0.003906  loss: 4.0389 (3.8466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6028 (0.6260)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [47]  [1200/2502]  eta: 0:07:17  lr: 0.003906  min_lr: 0.003906  loss: 3.8994 (3.8420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6083 (0.6231)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [47]  [1400/2502]  eta: 0:06:10  lr: 0.003905  min_lr: 0.003905  loss: 4.0931 (3.8309)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6188 (0.6257)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [47]  [1600/2502]  eta: 0:05:02  lr: 0.003905  min_lr: 0.003905  loss: 3.9117 (3.8332)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5971 (0.6245)  time: 0.3421  data: 0.0004  max mem: 27255
Epoch: [47]  [1800/2502]  eta: 0:03:55  lr: 0.003904  min_lr: 0.003904  loss: 3.9644 (3.8376)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6218 (0.6272)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [47]  [2000/2502]  eta: 0:02:48  lr: 0.003904  min_lr: 0.003904  loss: 3.8639 (3.8362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5527 (0.6284)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [47]  [2200/2502]  eta: 0:01:41  lr: 0.003903  min_lr: 0.003903  loss: 3.9187 (3.8338)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5786 (0.6287)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [47]  [2400/2502]  eta: 0:00:34  lr: 0.003902  min_lr: 0.003902  loss: 3.8936 (3.8316)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5808 (0.6269)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [47]  [2501/2502]  eta: 0:00:00  lr: 0.003902  min_lr: 0.003902  loss: 3.9784 (3.8321)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5957 (0.6268)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [47] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003902  min_lr: 0.003902  loss: 3.9784 (3.8284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5957 (0.6268)
Test:  [ 0/50]  eta: 0:03:02  loss: 0.8557 (0.8557)  acc1: 89.6000 (89.6000)  acc5: 98.0000 (98.0000)  time: 3.6524  data: 3.4596  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 1.1331 (1.1635)  acc1: 79.2000 (79.1273)  acc5: 96.0000 (95.3455)  time: 0.5038  data: 0.3370  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1729 (1.1848)  acc1: 76.4000 (77.9810)  acc5: 96.0000 (95.3714)  time: 0.1833  data: 0.0193  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3519 (1.3113)  acc1: 71.2000 (75.0194)  acc5: 91.2000 (93.1871)  time: 0.1841  data: 0.0199  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6323 (1.3888)  acc1: 67.6000 (73.0732)  acc5: 87.6000 (91.8146)  time: 0.1785  data: 0.0131  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6270 (1.4043)  acc1: 67.6000 (72.2320)  acc5: 88.0000 (91.6640)  time: 0.1782  data: 0.0129  max mem: 27255
Test: Total time: 0:00:12 (0.2493 s / it)
* Acc@1 72.268 Acc@5 91.554 loss 1.411
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.27%
Epoch: [48]  [   0/2502]  eta: 1:22:46  lr: 0.003902  min_lr: 0.003902  loss: 2.6439 (2.6439)  weight_decay: 0.0500 (0.0500)  time: 1.9848  data: 1.6282  max mem: 27255
Epoch: [48]  [ 200/2502]  eta: 0:13:07  lr: 0.003902  min_lr: 0.003902  loss: 3.9666 (3.7766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6112 (0.6695)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [48]  [ 400/2502]  eta: 0:11:49  lr: 0.003901  min_lr: 0.003901  loss: 4.0789 (3.8006)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5981 (0.6608)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [48]  [ 600/2502]  eta: 0:10:40  lr: 0.003900  min_lr: 0.003900  loss: 4.0648 (3.8097)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6116 (0.6504)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [48]  [ 800/2502]  eta: 0:09:32  lr: 0.003900  min_lr: 0.003900  loss: 4.0545 (3.8338)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6411 (0.6532)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [48]  [1000/2502]  eta: 0:08:24  lr: 0.003899  min_lr: 0.003899  loss: 4.1180 (3.8426)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5990 (0.6492)  time: 0.3348  data: 0.0003  max mem: 27255
Epoch: [48]  [1200/2502]  eta: 0:07:16  lr: 0.003899  min_lr: 0.003899  loss: 4.0741 (3.8366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6827 (0.6498)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [48]  [1400/2502]  eta: 0:06:09  lr: 0.003898  min_lr: 0.003898  loss: 3.7877 (3.8415)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5911 (0.6454)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [48]  [1600/2502]  eta: 0:05:02  lr: 0.003898  min_lr: 0.003898  loss: 4.0089 (3.8460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6539 (0.6460)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [48]  [1800/2502]  eta: 0:03:55  lr: 0.003897  min_lr: 0.003897  loss: 4.0764 (3.8455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5556 (0.6418)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [48]  [2000/2502]  eta: 0:02:48  lr: 0.003897  min_lr: 0.003897  loss: 3.6207 (3.8437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5731 (0.6365)  time: 0.3351  data: 0.0003  max mem: 27255
Epoch: [48]  [2200/2502]  eta: 0:01:41  lr: 0.003896  min_lr: 0.003896  loss: 3.9425 (3.8414)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6525 (0.6404)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [48]  [2400/2502]  eta: 0:00:34  lr: 0.003895  min_lr: 0.003895  loss: 3.8428 (3.8368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6156 (0.6443)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [48]  [2501/2502]  eta: 0:00:00  lr: 0.003895  min_lr: 0.003895  loss: 3.7948 (3.8343)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5253 (0.6415)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [48] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003895  min_lr: 0.003895  loss: 3.7948 (3.8262)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5253 (0.6415)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.7958 (0.7958)  acc1: 92.0000 (92.0000)  acc5: 98.0000 (98.0000)  time: 2.6373  data: 2.4403  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0080 (1.1022)  acc1: 78.4000 (78.7636)  acc5: 95.2000 (94.7636)  time: 0.4058  data: 0.2390  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0730 (1.1055)  acc1: 77.2000 (78.0000)  acc5: 95.2000 (95.0667)  time: 0.2422  data: 0.0780  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2631 (1.2381)  acc1: 70.8000 (75.1484)  acc5: 91.6000 (92.9677)  time: 0.2485  data: 0.0843  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5704 (1.3313)  acc1: 66.4000 (72.9756)  acc5: 87.6000 (91.8244)  time: 0.1938  data: 0.0301  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5476 (1.3515)  acc1: 66.0000 (72.1360)  acc5: 88.8000 (91.6240)  time: 0.1781  data: 0.0145  max mem: 27255
Test: Total time: 0:00:12 (0.2583 s / it)
* Acc@1 72.238 Acc@5 91.660 loss 1.352
Accuracy of the model on the 50000 test images: 72.2%
Max accuracy: 72.27%
Epoch: [49]  [   0/2502]  eta: 1:26:23  lr: 0.003895  min_lr: 0.003895  loss: 3.7054 (3.7054)  weight_decay: 0.0500 (0.0500)  time: 2.0716  data: 1.7137  max mem: 27255
Epoch: [49]  [ 200/2502]  eta: 0:13:08  lr: 0.003895  min_lr: 0.003895  loss: 4.1258 (3.8144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6329 (0.6625)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [49]  [ 400/2502]  eta: 0:11:52  lr: 0.003894  min_lr: 0.003894  loss: 4.0301 (3.8178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6119 (0.6353)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [49]  [ 600/2502]  eta: 0:10:41  lr: 0.003893  min_lr: 0.003893  loss: 3.7845 (3.8205)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5843 (0.6396)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [49]  [ 800/2502]  eta: 0:09:32  lr: 0.003893  min_lr: 0.003893  loss: 3.9872 (3.8062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6423 (0.6338)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [49]  [1000/2502]  eta: 0:08:24  lr: 0.003892  min_lr: 0.003892  loss: 4.0181 (3.8027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5491 (0.6244)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [49]  [1200/2502]  eta: 0:07:17  lr: 0.003892  min_lr: 0.003892  loss: 3.7672 (3.8136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5770 (0.6254)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [49]  [1400/2502]  eta: 0:06:09  lr: 0.003891  min_lr: 0.003891  loss: 3.9833 (3.8137)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6303 (0.6308)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [49]  [1600/2502]  eta: 0:05:02  lr: 0.003890  min_lr: 0.003890  loss: 4.1510 (3.8073)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5898 (0.6286)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [49]  [1800/2502]  eta: 0:03:55  lr: 0.003890  min_lr: 0.003890  loss: 3.9883 (3.8124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6037 (0.6306)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [49]  [2000/2502]  eta: 0:02:48  lr: 0.003889  min_lr: 0.003889  loss: 4.0895 (3.8107)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6007 (0.6311)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [49]  [2200/2502]  eta: 0:01:41  lr: 0.003889  min_lr: 0.003889  loss: 3.7920 (3.8128)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5933 (0.6310)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [49]  [2400/2502]  eta: 0:00:34  lr: 0.003888  min_lr: 0.003888  loss: 3.5775 (3.8165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5563 (0.6288)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [49]  [2501/2502]  eta: 0:00:00  lr: 0.003888  min_lr: 0.003888  loss: 3.9941 (3.8181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6203 (0.6288)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [49] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003888  min_lr: 0.003888  loss: 3.9941 (3.8096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6203 (0.6288)
Test:  [ 0/50]  eta: 0:02:08  loss: 0.9286 (0.9286)  acc1: 91.2000 (91.2000)  acc5: 96.8000 (96.8000)  time: 2.5601  data: 2.3560  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.1774 (1.1702)  acc1: 81.2000 (79.1273)  acc5: 96.0000 (95.2727)  time: 0.4132  data: 0.2457  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1812 (1.1820)  acc1: 76.8000 (78.0762)  acc5: 96.0000 (95.1619)  time: 0.2095  data: 0.0456  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3741 (1.3257)  acc1: 72.4000 (74.8645)  acc5: 92.4000 (93.1871)  time: 0.2070  data: 0.0431  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6642 (1.4098)  acc1: 66.4000 (72.9268)  acc5: 87.2000 (91.7171)  time: 0.2012  data: 0.0375  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6602 (1.4155)  acc1: 66.4000 (72.4320)  acc5: 87.6000 (91.5360)  time: 0.2026  data: 0.0390  max mem: 27255
Test: Total time: 0:00:12 (0.2534 s / it)
* Acc@1 72.256 Acc@5 91.596 loss 1.418
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.27%
Epoch: [50]  [   0/2502]  eta: 1:17:36  lr: 0.003888  min_lr: 0.003888  loss: 4.2255 (4.2255)  weight_decay: 0.0500 (0.0500)  time: 1.8610  data: 1.3175  max mem: 27255
Epoch: [50]  [ 200/2502]  eta: 0:13:08  lr: 0.003887  min_lr: 0.003887  loss: 4.0068 (3.8522)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5628 (0.6469)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [50]  [ 400/2502]  eta: 0:11:52  lr: 0.003887  min_lr: 0.003887  loss: 3.8994 (3.7927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6349 (0.6348)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [50]  [ 600/2502]  eta: 0:10:41  lr: 0.003886  min_lr: 0.003886  loss: 4.0694 (3.8103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6137 (0.6389)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [50]  [ 800/2502]  eta: 0:09:33  lr: 0.003885  min_lr: 0.003885  loss: 4.0246 (3.8228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6314 (0.6364)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [50]  [1000/2502]  eta: 0:08:25  lr: 0.003885  min_lr: 0.003885  loss: 3.8919 (3.8065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5873 (inf)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [50]  [1200/2502]  eta: 0:07:17  lr: 0.003884  min_lr: 0.003884  loss: 3.8704 (3.8077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6198 (inf)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [50]  [1400/2502]  eta: 0:06:09  lr: 0.003884  min_lr: 0.003884  loss: 3.9899 (3.8052)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5957 (inf)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [50]  [1600/2502]  eta: 0:05:02  lr: 0.003883  min_lr: 0.003883  loss: 3.4305 (3.8109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6120 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [50]  [1800/2502]  eta: 0:03:55  lr: 0.003882  min_lr: 0.003882  loss: 3.9101 (3.8114)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5572 (inf)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [50]  [2000/2502]  eta: 0:02:48  lr: 0.003882  min_lr: 0.003882  loss: 3.9904 (3.8108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6149 (inf)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [50]  [2200/2502]  eta: 0:01:41  lr: 0.003881  min_lr: 0.003881  loss: 3.9552 (3.8129)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6442 (inf)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [50]  [2400/2502]  eta: 0:00:34  lr: 0.003881  min_lr: 0.003881  loss: 4.0756 (3.8141)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6983 (inf)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [50]  [2501/2502]  eta: 0:00:00  lr: 0.003880  min_lr: 0.003880  loss: 3.7784 (3.8126)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6268 (inf)  time: 0.3014  data: 0.0009  max mem: 27255
Epoch: [50] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003880  min_lr: 0.003880  loss: 3.7784 (3.8079)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6268 (inf)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.7650 (0.7650)  acc1: 88.0000 (88.0000)  acc5: 98.8000 (98.8000)  time: 2.8118  data: 2.6070  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1120 (1.1221)  acc1: 80.4000 (79.1636)  acc5: 95.6000 (95.3455)  time: 0.4454  data: 0.2778  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1134 (1.1262)  acc1: 78.0000 (78.2476)  acc5: 95.6000 (95.4476)  time: 0.1892  data: 0.0241  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3551 (1.2596)  acc1: 70.4000 (75.2903)  acc5: 91.2000 (93.3161)  time: 0.1792  data: 0.0141  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6091 (1.3467)  acc1: 67.2000 (72.9854)  acc5: 88.0000 (91.9902)  time: 0.2124  data: 0.0486  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5796 (1.3683)  acc1: 65.6000 (72.3200)  acc5: 88.0000 (91.7520)  time: 0.1999  data: 0.0363  max mem: 27255
Test: Total time: 0:00:12 (0.2484 s / it)
* Acc@1 72.330 Acc@5 91.568 loss 1.381
Accuracy of the model on the 50000 test images: 72.3%
Max accuracy: 72.33%
Epoch: [51]  [   0/2502]  eta: 1:16:27  lr: 0.003880  min_lr: 0.003880  loss: 3.8257 (3.8257)  weight_decay: 0.0500 (0.0500)  time: 1.8334  data: 1.4771  max mem: 27255
Epoch: [51]  [ 200/2502]  eta: 0:13:05  lr: 0.003880  min_lr: 0.003880  loss: 3.8705 (3.7809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5598 (0.5991)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [51]  [ 400/2502]  eta: 0:11:50  lr: 0.003879  min_lr: 0.003879  loss: 4.1129 (3.8027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5874 (0.6080)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [51]  [ 600/2502]  eta: 0:10:40  lr: 0.003878  min_lr: 0.003878  loss: 3.5798 (3.8099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5904 (0.6076)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [51]  [ 800/2502]  eta: 0:09:31  lr: 0.003878  min_lr: 0.003878  loss: 3.5406 (3.8008)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5923 (0.6166)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [51]  [1000/2502]  eta: 0:08:23  lr: 0.003877  min_lr: 0.003877  loss: 4.0544 (3.8093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5949 (0.6199)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [51]  [1200/2502]  eta: 0:07:16  lr: 0.003877  min_lr: 0.003877  loss: 3.8114 (3.8111)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5739 (0.6253)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [51]  [1400/2502]  eta: 0:06:09  lr: 0.003876  min_lr: 0.003876  loss: 3.7213 (3.8024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6167 (0.6257)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [51]  [1600/2502]  eta: 0:05:02  lr: 0.003875  min_lr: 0.003875  loss: 3.7813 (3.8048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6231 (0.6255)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [51]  [1800/2502]  eta: 0:03:55  lr: 0.003875  min_lr: 0.003875  loss: 3.8872 (3.7999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5966 (0.6287)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [51]  [2000/2502]  eta: 0:02:48  lr: 0.003874  min_lr: 0.003874  loss: 4.1120 (3.8000)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6092 (0.6313)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [51]  [2200/2502]  eta: 0:01:41  lr: 0.003873  min_lr: 0.003873  loss: 4.2012 (3.7992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7319 (0.6345)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [51]  [2400/2502]  eta: 0:00:34  lr: 0.003873  min_lr: 0.003873  loss: 3.8874 (3.8025)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6629 (0.6363)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [51]  [2501/2502]  eta: 0:00:00  lr: 0.003873  min_lr: 0.003873  loss: 3.8486 (3.8032)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5879 (0.6368)  time: 0.3006  data: 0.0007  max mem: 27255
Epoch: [51] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.003873  min_lr: 0.003873  loss: 3.8486 (3.8072)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5879 (0.6368)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.8083 (0.8083)  acc1: 89.6000 (89.6000)  acc5: 98.0000 (98.0000)  time: 2.7758  data: 2.5804  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.1662 (1.1415)  acc1: 80.4000 (79.0545)  acc5: 95.2000 (95.4545)  time: 0.4205  data: 0.2537  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1662 (1.1428)  acc1: 78.0000 (78.4191)  acc5: 95.2000 (95.3905)  time: 0.1938  data: 0.0287  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3338 (1.3004)  acc1: 70.8000 (75.1742)  acc5: 90.4000 (93.2129)  time: 0.2266  data: 0.0613  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7262 (1.3934)  acc1: 68.0000 (73.1610)  acc5: 88.0000 (91.9805)  time: 0.2240  data: 0.0600  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6803 (1.4101)  acc1: 67.6000 (72.5840)  acc5: 88.8000 (91.7440)  time: 0.1822  data: 0.0186  max mem: 27255
Test: Total time: 0:00:12 (0.2544 s / it)
* Acc@1 72.464 Acc@5 91.702 loss 1.412
Accuracy of the model on the 50000 test images: 72.5%
Max accuracy: 72.46%
Epoch: [52]  [   0/2502]  eta: 1:27:40  lr: 0.003873  min_lr: 0.003873  loss: 3.4542 (3.4542)  weight_decay: 0.0500 (0.0500)  time: 2.1025  data: 1.7492  max mem: 27255
Epoch: [52]  [ 200/2502]  eta: 0:13:11  lr: 0.003872  min_lr: 0.003872  loss: 3.9585 (3.7590)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6077 (0.6259)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [52]  [ 400/2502]  eta: 0:11:53  lr: 0.003871  min_lr: 0.003871  loss: 3.9833 (3.7940)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6109 (0.6150)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [52]  [ 600/2502]  eta: 0:10:42  lr: 0.003871  min_lr: 0.003871  loss: 4.1289 (3.7828)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6253 (0.6214)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [52]  [ 800/2502]  eta: 0:09:33  lr: 0.003870  min_lr: 0.003870  loss: 3.7159 (3.7784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5740 (0.6283)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [52]  [1000/2502]  eta: 0:08:25  lr: 0.003869  min_lr: 0.003869  loss: 4.0841 (3.7802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6317 (0.6276)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [52]  [1200/2502]  eta: 0:07:17  lr: 0.003869  min_lr: 0.003869  loss: 3.7128 (3.7745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6597 (0.6393)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [52]  [1400/2502]  eta: 0:06:10  lr: 0.003868  min_lr: 0.003868  loss: 3.7989 (3.7829)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6185 (0.6408)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [52]  [1600/2502]  eta: 0:05:03  lr: 0.003867  min_lr: 0.003867  loss: 4.1239 (3.7763)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5986 (0.6393)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [52]  [1800/2502]  eta: 0:03:55  lr: 0.003867  min_lr: 0.003867  loss: 3.8679 (3.7733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5986 (0.6411)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [52]  [2000/2502]  eta: 0:02:48  lr: 0.003866  min_lr: 0.003866  loss: 3.7615 (3.7716)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5532 (0.6364)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [52]  [2200/2502]  eta: 0:01:41  lr: 0.003865  min_lr: 0.003865  loss: 4.1242 (3.7780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5533 (0.6351)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [52]  [2400/2502]  eta: 0:00:34  lr: 0.003865  min_lr: 0.003865  loss: 3.8118 (3.7785)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6211 (0.6337)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [52]  [2501/2502]  eta: 0:00:00  lr: 0.003865  min_lr: 0.003865  loss: 4.0188 (3.7807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5872 (0.6334)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [52] Total time: 0:14:00 (0.3358 s / it)
Averaged stats: lr: 0.003865  min_lr: 0.003865  loss: 4.0188 (3.7967)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5872 (0.6334)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.8700 (0.8700)  acc1: 88.4000 (88.4000)  acc5: 97.2000 (97.2000)  time: 2.8836  data: 2.6837  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0890 (1.1546)  acc1: 80.8000 (79.6000)  acc5: 96.0000 (95.6727)  time: 0.4429  data: 0.2751  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.1064 (1.1651)  acc1: 78.8000 (79.3333)  acc5: 96.0000 (95.6571)  time: 0.2506  data: 0.0864  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.4006 (1.3110)  acc1: 74.0000 (75.8968)  acc5: 91.2000 (93.4194)  time: 0.2553  data: 0.0907  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.6388 (1.3960)  acc1: 66.8000 (73.6098)  acc5: 87.6000 (92.2732)  time: 0.2263  data: 0.0619  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6223 (1.4129)  acc1: 67.2000 (73.0560)  acc5: 88.4000 (91.9520)  time: 0.2041  data: 0.0405  max mem: 27255
Test: Total time: 0:00:13 (0.2798 s / it)
* Acc@1 72.496 Acc@5 91.732 loss 1.422
Accuracy of the model on the 50000 test images: 72.5%
Max accuracy: 72.50%
Epoch: [53]  [   0/2502]  eta: 1:21:22  lr: 0.003865  min_lr: 0.003865  loss: 4.3531 (4.3531)  weight_decay: 0.0500 (0.0500)  time: 1.9516  data: 1.5893  max mem: 27255
Epoch: [53]  [ 200/2502]  eta: 0:13:06  lr: 0.003864  min_lr: 0.003864  loss: 3.3781 (3.7486)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5887 (0.6128)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [53]  [ 400/2502]  eta: 0:11:51  lr: 0.003863  min_lr: 0.003863  loss: 4.0802 (3.7879)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5812 (0.6161)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [53]  [ 600/2502]  eta: 0:10:41  lr: 0.003863  min_lr: 0.003863  loss: 3.9763 (3.7997)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6083 (0.6203)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [53]  [ 800/2502]  eta: 0:09:33  lr: 0.003862  min_lr: 0.003862  loss: 4.0977 (3.8113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5946 (0.6254)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [53]  [1000/2502]  eta: 0:08:25  lr: 0.003861  min_lr: 0.003861  loss: 4.0945 (3.8071)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6318 (0.6344)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [53]  [1200/2502]  eta: 0:07:17  lr: 0.003861  min_lr: 0.003861  loss: 3.7350 (3.8060)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6832 (0.6427)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [53]  [1400/2502]  eta: 0:06:09  lr: 0.003860  min_lr: 0.003860  loss: 3.6571 (3.7956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6900 (0.6453)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [53]  [1600/2502]  eta: 0:05:02  lr: 0.003859  min_lr: 0.003859  loss: 4.0239 (3.7934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6531 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [53]  [1800/2502]  eta: 0:03:55  lr: 0.003859  min_lr: 0.003859  loss: 3.7885 (3.7958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5987 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [53]  [2000/2502]  eta: 0:02:48  lr: 0.003858  min_lr: 0.003858  loss: 4.0156 (3.7948)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5637 (inf)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [53]  [2200/2502]  eta: 0:01:41  lr: 0.003857  min_lr: 0.003857  loss: 3.7795 (3.7923)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5714 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [53]  [2400/2502]  eta: 0:00:34  lr: 0.003857  min_lr: 0.003857  loss: 3.9999 (3.7959)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6460 (inf)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [53]  [2501/2502]  eta: 0:00:00  lr: 0.003856  min_lr: 0.003856  loss: 3.8832 (3.7946)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6117 (inf)  time: 0.2999  data: 0.0009  max mem: 27255
Epoch: [53] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003856  min_lr: 0.003856  loss: 3.8832 (3.7934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6117 (inf)
Test:  [ 0/50]  eta: 0:02:09  loss: 0.8873 (0.8873)  acc1: 84.4000 (84.4000)  acc5: 96.8000 (96.8000)  time: 2.5825  data: 2.3866  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.1131 (1.1126)  acc1: 78.0000 (79.0909)  acc5: 95.2000 (95.0182)  time: 0.4668  data: 0.3001  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1145 (1.1290)  acc1: 76.8000 (78.2857)  acc5: 95.2000 (95.2762)  time: 0.2157  data: 0.0519  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3857 (1.2835)  acc1: 73.2000 (75.0839)  acc5: 92.4000 (93.2387)  time: 0.1917  data: 0.0278  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6705 (1.3720)  acc1: 66.8000 (73.0341)  acc5: 88.4000 (92.0976)  time: 0.2044  data: 0.0407  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6182 (1.3919)  acc1: 67.2000 (72.5120)  acc5: 88.8000 (91.8320)  time: 0.1828  data: 0.0191  max mem: 27255
Test: Total time: 0:00:12 (0.2509 s / it)
* Acc@1 72.628 Acc@5 91.778 loss 1.399
Accuracy of the model on the 50000 test images: 72.6%
Max accuracy: 72.63%
Epoch: [54]  [   0/2502]  eta: 1:32:43  lr: 0.003856  min_lr: 0.003856  loss: 3.1036 (3.1036)  weight_decay: 0.0500 (0.0500)  time: 2.2236  data: 1.8697  max mem: 27255
Epoch: [54]  [ 200/2502]  eta: 0:13:14  lr: 0.003856  min_lr: 0.003856  loss: 3.7262 (3.7611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6434 (0.6342)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [54]  [ 400/2502]  eta: 0:11:53  lr: 0.003855  min_lr: 0.003855  loss: 3.9134 (3.7989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5124 (0.6352)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [54]  [ 600/2502]  eta: 0:10:42  lr: 0.003854  min_lr: 0.003854  loss: 3.8503 (3.7912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6023 (0.6452)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [54]  [ 800/2502]  eta: 0:09:33  lr: 0.003854  min_lr: 0.003854  loss: 3.5808 (3.8076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5918 (0.6374)  time: 0.3362  data: 0.0005  max mem: 27255
Epoch: [54]  [1000/2502]  eta: 0:08:25  lr: 0.003853  min_lr: 0.003853  loss: 3.7674 (3.8133)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6511 (0.6398)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [54]  [1200/2502]  eta: 0:07:17  lr: 0.003852  min_lr: 0.003852  loss: 4.0084 (3.8122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5618 (0.6404)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [54]  [1400/2502]  eta: 0:06:10  lr: 0.003852  min_lr: 0.003852  loss: 3.8954 (3.8135)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5892 (0.6411)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [54]  [1600/2502]  eta: 0:05:02  lr: 0.003851  min_lr: 0.003851  loss: 3.8876 (3.8090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6452 (inf)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [54]  [1800/2502]  eta: 0:03:55  lr: 0.003850  min_lr: 0.003850  loss: 3.7646 (3.8008)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6960 (inf)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [54]  [2000/2502]  eta: 0:02:48  lr: 0.003850  min_lr: 0.003850  loss: 4.0555 (3.8033)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6108 (inf)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [54]  [2200/2502]  eta: 0:01:41  lr: 0.003849  min_lr: 0.003849  loss: 3.6464 (3.8034)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6739 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [54]  [2400/2502]  eta: 0:00:34  lr: 0.003848  min_lr: 0.003848  loss: 3.7581 (3.7999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5881 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [54]  [2501/2502]  eta: 0:00:00  lr: 0.003848  min_lr: 0.003848  loss: 3.9074 (3.8016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6209 (inf)  time: 0.2997  data: 0.0009  max mem: 27255
Epoch: [54] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.003848  min_lr: 0.003848  loss: 3.9074 (3.7914)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6209 (inf)
Test:  [ 0/50]  eta: 0:02:21  loss: 0.8136 (0.8136)  acc1: 89.6000 (89.6000)  acc5: 98.4000 (98.4000)  time: 2.8284  data: 2.6326  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1786 (1.1601)  acc1: 78.8000 (78.9455)  acc5: 95.6000 (95.1273)  time: 0.4263  data: 0.2596  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1786 (1.1771)  acc1: 76.0000 (77.9619)  acc5: 95.6000 (95.3333)  time: 0.2089  data: 0.0441  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3541 (1.3123)  acc1: 72.0000 (75.0968)  acc5: 91.6000 (93.1613)  time: 0.2478  data: 0.0823  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.6380 (1.3898)  acc1: 68.0000 (73.2683)  acc5: 87.6000 (91.9512)  time: 0.2680  data: 0.1037  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6323 (1.4025)  acc1: 68.0000 (72.6160)  acc5: 88.0000 (91.8000)  time: 0.2291  data: 0.0657  max mem: 27255
Test: Total time: 0:00:14 (0.2830 s / it)
* Acc@1 72.808 Acc@5 91.872 loss 1.402
Accuracy of the model on the 50000 test images: 72.8%
Max accuracy: 72.81%
Epoch: [55]  [   0/2502]  eta: 1:11:32  lr: 0.003848  min_lr: 0.003848  loss: 3.2974 (3.2974)  weight_decay: 0.0500 (0.0500)  time: 1.7158  data: 1.3698  max mem: 27255
Epoch: [55]  [ 200/2502]  eta: 0:13:07  lr: 0.003847  min_lr: 0.003847  loss: 3.8224 (3.7480)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6957 (0.6986)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [55]  [ 400/2502]  eta: 0:11:51  lr: 0.003846  min_lr: 0.003846  loss: 3.8734 (3.7737)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5216 (0.6469)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [55]  [ 600/2502]  eta: 0:10:40  lr: 0.003846  min_lr: 0.003846  loss: 3.7974 (3.7745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5939 (0.6258)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [55]  [ 800/2502]  eta: 0:09:32  lr: 0.003845  min_lr: 0.003845  loss: 3.5802 (3.7789)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6597 (0.6399)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [55]  [1000/2502]  eta: 0:08:24  lr: 0.003844  min_lr: 0.003844  loss: 3.6659 (3.7641)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6290 (0.6436)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [55]  [1200/2502]  eta: 0:07:16  lr: 0.003844  min_lr: 0.003844  loss: 3.6526 (3.7571)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6478 (0.6393)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [55]  [1400/2502]  eta: 0:06:09  lr: 0.003843  min_lr: 0.003843  loss: 3.4704 (3.7535)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5596 (0.6420)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [55]  [1600/2502]  eta: 0:05:02  lr: 0.003842  min_lr: 0.003842  loss: 3.7357 (3.7546)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6563 (0.6480)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [55]  [1800/2502]  eta: 0:03:55  lr: 0.003842  min_lr: 0.003842  loss: 3.8423 (3.7591)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (0.6479)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [55]  [2000/2502]  eta: 0:02:48  lr: 0.003841  min_lr: 0.003841  loss: 3.9369 (3.7634)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6116 (0.6468)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [55]  [2200/2502]  eta: 0:01:41  lr: 0.003840  min_lr: 0.003840  loss: 3.8343 (3.7628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5829 (0.6455)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [55]  [2400/2502]  eta: 0:00:34  lr: 0.003839  min_lr: 0.003839  loss: 3.9439 (3.7692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6027 (0.6442)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [55]  [2501/2502]  eta: 0:00:00  lr: 0.003839  min_lr: 0.003839  loss: 3.9381 (3.7676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5958 (0.6437)  time: 0.3001  data: 0.0011  max mem: 27255
Epoch: [55] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003839  min_lr: 0.003839  loss: 3.9381 (3.7755)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5958 (0.6437)
Test:  [ 0/50]  eta: 0:02:19  loss: 0.7973 (0.7973)  acc1: 90.0000 (90.0000)  acc5: 97.2000 (97.2000)  time: 2.7892  data: 2.5980  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.1030 (1.1063)  acc1: 80.8000 (80.1091)  acc5: 96.0000 (95.2000)  time: 0.4789  data: 0.3125  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1035 (1.1212)  acc1: 78.8000 (78.8571)  acc5: 96.0000 (95.4857)  time: 0.2307  data: 0.0668  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3410 (1.2651)  acc1: 71.6000 (75.8968)  acc5: 92.4000 (93.2645)  time: 0.2032  data: 0.0393  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6557 (1.3551)  acc1: 67.6000 (73.7951)  acc5: 87.2000 (91.9415)  time: 0.1783  data: 0.0144  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5981 (1.3765)  acc1: 67.6000 (73.1280)  acc5: 87.6000 (91.8400)  time: 0.1781  data: 0.0143  max mem: 27255
Test: Total time: 0:00:12 (0.2505 s / it)
* Acc@1 72.988 Acc@5 91.976 loss 1.382
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 72.99%
Epoch: [56]  [   0/2502]  eta: 1:16:55  lr: 0.003839  min_lr: 0.003839  loss: 2.4520 (2.4520)  weight_decay: 0.0500 (0.0500)  time: 1.8446  data: 1.4964  max mem: 27255
Epoch: [56]  [ 200/2502]  eta: 0:13:08  lr: 0.003838  min_lr: 0.003838  loss: 3.8811 (3.7633)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6068 (0.6432)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [56]  [ 400/2502]  eta: 0:11:52  lr: 0.003838  min_lr: 0.003838  loss: 3.8734 (3.7589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5571 (0.6319)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [56]  [ 600/2502]  eta: 0:10:41  lr: 0.003837  min_lr: 0.003837  loss: 3.6250 (3.7658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5917 (0.6389)  time: 0.3356  data: 0.0005  max mem: 27255
Epoch: [56]  [ 800/2502]  eta: 0:09:33  lr: 0.003836  min_lr: 0.003836  loss: 3.6412 (3.7669)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6389 (0.6377)  time: 0.3369  data: 0.0005  max mem: 27255
Epoch: [56]  [1000/2502]  eta: 0:08:25  lr: 0.003836  min_lr: 0.003836  loss: 3.4933 (3.7639)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7435 (0.6400)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [56]  [1200/2502]  eta: 0:07:17  lr: 0.003835  min_lr: 0.003835  loss: 4.0589 (3.7645)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5860 (0.6393)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [56]  [1400/2502]  eta: 0:06:10  lr: 0.003834  min_lr: 0.003834  loss: 3.8088 (3.7494)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6343 (0.6401)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [56]  [1600/2502]  eta: 0:05:02  lr: 0.003833  min_lr: 0.003833  loss: 3.9718 (3.7554)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6094 (0.6426)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [56]  [1800/2502]  eta: 0:03:55  lr: 0.003833  min_lr: 0.003833  loss: 3.5791 (3.7606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6466 (0.6406)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [56]  [2000/2502]  eta: 0:02:48  lr: 0.003832  min_lr: 0.003832  loss: 3.6705 (3.7575)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5652 (0.6383)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [56]  [2200/2502]  eta: 0:01:41  lr: 0.003831  min_lr: 0.003831  loss: 3.5927 (3.7610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5951 (0.6351)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [56]  [2400/2502]  eta: 0:00:34  lr: 0.003831  min_lr: 0.003831  loss: 3.8318 (3.7609)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6166 (0.6386)  time: 0.3366  data: 0.0005  max mem: 27255
Epoch: [56]  [2501/2502]  eta: 0:00:00  lr: 0.003830  min_lr: 0.003830  loss: 3.7772 (3.7613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5969 (0.6384)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [56] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003830  min_lr: 0.003830  loss: 3.7772 (3.7703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5969 (0.6384)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.6709 (0.6709)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.6335  data: 2.4402  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9985 (1.0336)  acc1: 82.0000 (80.4727)  acc5: 95.6000 (95.5273)  time: 0.4144  data: 0.2479  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9985 (1.0445)  acc1: 78.0000 (79.5619)  acc5: 95.6000 (95.6381)  time: 0.2576  data: 0.0937  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2471 (1.2049)  acc1: 72.8000 (76.2065)  acc5: 93.2000 (93.6000)  time: 0.2548  data: 0.0910  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5690 (1.2913)  acc1: 68.0000 (74.0781)  acc5: 88.8000 (92.6244)  time: 0.1957  data: 0.0319  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5552 (1.3148)  acc1: 68.4000 (73.5760)  acc5: 88.8000 (92.4640)  time: 0.1847  data: 0.0211  max mem: 27255
Test: Total time: 0:00:13 (0.2657 s / it)
* Acc@1 73.452 Acc@5 92.334 loss 1.317
Accuracy of the model on the 50000 test images: 73.5%
Max accuracy: 73.45%
Epoch: [57]  [   0/2502]  eta: 1:17:14  lr: 0.003830  min_lr: 0.003830  loss: 4.2503 (4.2503)  weight_decay: 0.0500 (0.0500)  time: 1.8523  data: 1.4995  max mem: 27255
Epoch: [57]  [ 200/2502]  eta: 0:13:08  lr: 0.003829  min_lr: 0.003829  loss: 3.9406 (3.7525)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6549 (0.6577)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [57]  [ 400/2502]  eta: 0:11:51  lr: 0.003829  min_lr: 0.003829  loss: 4.0629 (3.7652)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6138 (0.6587)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [57]  [ 600/2502]  eta: 0:10:41  lr: 0.003828  min_lr: 0.003828  loss: 3.8268 (3.7575)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5773 (0.6596)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [57]  [ 800/2502]  eta: 0:09:33  lr: 0.003827  min_lr: 0.003827  loss: 3.8413 (3.7489)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5553 (0.6564)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [57]  [1000/2502]  eta: 0:08:25  lr: 0.003827  min_lr: 0.003827  loss: 3.8350 (3.7580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6788 (0.6569)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [57]  [1200/2502]  eta: 0:07:17  lr: 0.003826  min_lr: 0.003826  loss: 3.6165 (3.7479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5677 (0.6517)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [57]  [1400/2502]  eta: 0:06:09  lr: 0.003825  min_lr: 0.003825  loss: 3.7569 (3.7482)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5681 (0.6455)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [57]  [1600/2502]  eta: 0:05:02  lr: 0.003824  min_lr: 0.003824  loss: 4.0475 (3.7544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5912 (0.6452)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [57]  [1800/2502]  eta: 0:03:55  lr: 0.003824  min_lr: 0.003824  loss: 4.0055 (3.7502)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5789 (0.6457)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [57]  [2000/2502]  eta: 0:02:48  lr: 0.003823  min_lr: 0.003823  loss: 3.9409 (3.7483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6240 (0.6457)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [57]  [2200/2502]  eta: 0:01:41  lr: 0.003822  min_lr: 0.003822  loss: 3.7327 (3.7520)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5992 (0.6462)  time: 0.3365  data: 0.0004  max mem: 27255
Epoch: [57]  [2400/2502]  eta: 0:00:34  lr: 0.003821  min_lr: 0.003821  loss: 3.6790 (3.7493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5900 (0.6421)  time: 0.3391  data: 0.0004  max mem: 27255
Epoch: [57]  [2501/2502]  eta: 0:00:00  lr: 0.003821  min_lr: 0.003821  loss: 4.0962 (3.7481)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5769 (0.6414)  time: 0.3004  data: 0.0007  max mem: 27255
Epoch: [57] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.003821  min_lr: 0.003821  loss: 4.0962 (3.7651)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5769 (0.6414)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.8558 (0.8558)  acc1: 87.2000 (87.2000)  acc5: 97.6000 (97.6000)  time: 3.3364  data: 3.1466  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.1524 (1.1515)  acc1: 80.0000 (79.3818)  acc5: 96.0000 (95.4909)  time: 0.4558  data: 0.2897  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1979 (1.1909)  acc1: 78.0000 (78.4381)  acc5: 95.6000 (95.5048)  time: 0.1784  data: 0.0146  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3426 (1.3442)  acc1: 74.0000 (76.1419)  acc5: 92.8000 (93.3806)  time: 0.2074  data: 0.0437  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6979 (1.4385)  acc1: 68.8000 (73.8439)  acc5: 87.6000 (92.1756)  time: 0.2061  data: 0.0425  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6547 (1.4625)  acc1: 68.0000 (72.9840)  acc5: 88.4000 (91.9360)  time: 0.2058  data: 0.0423  max mem: 27255
Test: Total time: 0:00:12 (0.2519 s / it)
* Acc@1 72.828 Acc@5 91.930 loss 1.468
Accuracy of the model on the 50000 test images: 72.8%
Max accuracy: 73.45%
Epoch: [58]  [   0/2502]  eta: 1:35:37  lr: 0.003821  min_lr: 0.003821  loss: 3.4948 (3.4948)  weight_decay: 0.0500 (0.0500)  time: 2.2931  data: 1.5671  max mem: 27255
Epoch: [58]  [ 200/2502]  eta: 0:13:11  lr: 0.003820  min_lr: 0.003820  loss: 3.9785 (3.7093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5860 (0.6345)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [58]  [ 400/2502]  eta: 0:11:52  lr: 0.003820  min_lr: 0.003820  loss: 3.8005 (3.7171)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6545 (0.6561)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [58]  [ 600/2502]  eta: 0:10:43  lr: 0.003819  min_lr: 0.003819  loss: 4.0507 (3.7374)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5863 (0.6436)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [58]  [ 800/2502]  eta: 0:09:34  lr: 0.003818  min_lr: 0.003818  loss: 3.7762 (3.7391)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5466 (0.6354)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [58]  [1000/2502]  eta: 0:08:26  lr: 0.003817  min_lr: 0.003817  loss: 4.0825 (3.7473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6215 (0.6432)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [58]  [1200/2502]  eta: 0:07:18  lr: 0.003817  min_lr: 0.003817  loss: 3.8257 (3.7562)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5785 (0.6406)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [58]  [1400/2502]  eta: 0:06:10  lr: 0.003816  min_lr: 0.003816  loss: 3.9329 (3.7581)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6254 (0.6407)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [58]  [1600/2502]  eta: 0:05:03  lr: 0.003815  min_lr: 0.003815  loss: 3.9720 (3.7600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5242 (0.6360)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [58]  [1800/2502]  eta: 0:03:55  lr: 0.003814  min_lr: 0.003814  loss: 3.9877 (3.7574)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6894 (0.6385)  time: 0.3389  data: 0.0006  max mem: 27255
Epoch: [58]  [2000/2502]  eta: 0:02:48  lr: 0.003814  min_lr: 0.003814  loss: 3.8832 (3.7590)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5456 (0.6400)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [58]  [2200/2502]  eta: 0:01:41  lr: 0.003813  min_lr: 0.003813  loss: 3.8982 (3.7581)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5992 (0.6379)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [58]  [2400/2502]  eta: 0:00:34  lr: 0.003812  min_lr: 0.003812  loss: 3.8964 (3.7592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5920 (0.6358)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [58]  [2501/2502]  eta: 0:00:00  lr: 0.003812  min_lr: 0.003812  loss: 4.0107 (3.7589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6566 (0.6369)  time: 0.3006  data: 0.0009  max mem: 27255
Epoch: [58] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.003812  min_lr: 0.003812  loss: 4.0107 (3.7624)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6566 (0.6369)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.7875 (0.7875)  acc1: 90.4000 (90.4000)  acc5: 97.6000 (97.6000)  time: 2.6237  data: 2.4256  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0284 (1.0589)  acc1: 79.2000 (79.5273)  acc5: 96.0000 (95.1636)  time: 0.4417  data: 0.2746  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0350 (1.0674)  acc1: 77.6000 (78.7810)  acc5: 95.6000 (95.2952)  time: 0.2312  data: 0.0645  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2719 (1.2124)  acc1: 75.2000 (75.9742)  acc5: 92.0000 (93.4710)  time: 0.2197  data: 0.0531  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5638 (1.3005)  acc1: 68.4000 (73.8244)  acc5: 89.2000 (92.5463)  time: 0.1867  data: 0.0230  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5147 (1.3279)  acc1: 67.6000 (73.0320)  acc5: 89.6000 (92.2640)  time: 0.1683  data: 0.0047  max mem: 27255
Test: Total time: 0:00:12 (0.2510 s / it)
* Acc@1 72.848 Acc@5 92.110 loss 1.339
Accuracy of the model on the 50000 test images: 72.8%
Max accuracy: 73.45%
Epoch: [59]  [   0/2502]  eta: 1:34:23  lr: 0.003812  min_lr: 0.003812  loss: 3.3785 (3.3785)  weight_decay: 0.0500 (0.0500)  time: 2.2636  data: 1.3218  max mem: 27255
Epoch: [59]  [ 200/2502]  eta: 0:13:09  lr: 0.003811  min_lr: 0.003811  loss: 3.6800 (3.7784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6098 (0.6697)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [59]  [ 400/2502]  eta: 0:11:52  lr: 0.003810  min_lr: 0.003810  loss: 3.8240 (3.7638)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5767 (0.6516)  time: 0.3414  data: 0.0004  max mem: 27255
Epoch: [59]  [ 600/2502]  eta: 0:10:41  lr: 0.003809  min_lr: 0.003809  loss: 3.9085 (3.7543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6728 (0.6649)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [59]  [ 800/2502]  eta: 0:09:32  lr: 0.003809  min_lr: 0.003809  loss: 3.8083 (3.7631)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7071 (0.6655)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [59]  [1000/2502]  eta: 0:08:24  lr: 0.003808  min_lr: 0.003808  loss: 3.5492 (3.7602)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5820 (0.6542)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [59]  [1200/2502]  eta: 0:07:16  lr: 0.003807  min_lr: 0.003807  loss: 3.8239 (3.7630)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6306 (0.6512)  time: 0.3368  data: 0.0004  max mem: 27255
Epoch: [59]  [1400/2502]  eta: 0:06:09  lr: 0.003806  min_lr: 0.003806  loss: 3.8239 (3.7583)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5779 (0.6526)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [59]  [1600/2502]  eta: 0:05:02  lr: 0.003805  min_lr: 0.003805  loss: 3.9916 (3.7555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6264 (0.6502)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [59]  [1800/2502]  eta: 0:03:55  lr: 0.003805  min_lr: 0.003805  loss: 3.5429 (3.7482)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6779 (0.6515)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [59]  [2000/2502]  eta: 0:02:48  lr: 0.003804  min_lr: 0.003804  loss: 3.9354 (3.7550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6668 (0.6546)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [59]  [2200/2502]  eta: 0:01:41  lr: 0.003803  min_lr: 0.003803  loss: 3.8886 (3.7539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5985 (0.6527)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [59]  [2400/2502]  eta: 0:00:34  lr: 0.003802  min_lr: 0.003802  loss: 3.6443 (3.7535)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5894 (0.6483)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [59]  [2501/2502]  eta: 0:00:00  lr: 0.003802  min_lr: 0.003802  loss: 3.9259 (3.7580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6998 (0.6522)  time: 0.3004  data: 0.0008  max mem: 27255
Epoch: [59] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003802  min_lr: 0.003802  loss: 3.9259 (3.7649)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6998 (0.6522)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.9986 (0.9986)  acc1: 88.0000 (88.0000)  acc5: 97.2000 (97.2000)  time: 3.2690  data: 3.0693  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.2253 (1.2681)  acc1: 79.6000 (78.7636)  acc5: 96.0000 (95.3818)  time: 0.4746  data: 0.3075  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.2544 (1.2820)  acc1: 78.0000 (78.3048)  acc5: 96.0000 (95.4667)  time: 0.1988  data: 0.0349  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.4591 (1.4144)  acc1: 74.0000 (75.5871)  acc5: 91.2000 (93.3419)  time: 0.1834  data: 0.0195  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.7428 (1.4918)  acc1: 68.0000 (73.6683)  acc5: 87.6000 (92.1854)  time: 0.1698  data: 0.0058  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.7080 (1.5041)  acc1: 68.0000 (73.1040)  acc5: 88.0000 (91.8160)  time: 0.1873  data: 0.0235  max mem: 27255
Test: Total time: 0:00:12 (0.2512 s / it)
* Acc@1 73.100 Acc@5 92.028 loss 1.507
Accuracy of the model on the 50000 test images: 73.1%
Max accuracy: 73.45%
Epoch: [60]  [   0/2502]  eta: 1:26:28  lr: 0.003802  min_lr: 0.003802  loss: 4.3434 (4.3434)  weight_decay: 0.0500 (0.0500)  time: 2.0736  data: 1.4092  max mem: 27255
Epoch: [60]  [ 200/2502]  eta: 0:13:10  lr: 0.003801  min_lr: 0.003801  loss: 3.4538 (3.7318)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6287 (0.6791)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [60]  [ 400/2502]  eta: 0:11:56  lr: 0.003800  min_lr: 0.003800  loss: 3.9630 (3.7550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7790 (0.7091)  time: 0.3427  data: 0.0005  max mem: 27255
Epoch: [60]  [ 600/2502]  eta: 0:10:43  lr: 0.003800  min_lr: 0.003800  loss: 4.0131 (3.7723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6082 (0.6787)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [60]  [ 800/2502]  eta: 0:09:34  lr: 0.003799  min_lr: 0.003799  loss: 4.0324 (3.7720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6000 (0.6694)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [60]  [1000/2502]  eta: 0:08:25  lr: 0.003798  min_lr: 0.003798  loss: 3.8724 (3.7751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5587 (0.6552)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [60]  [1200/2502]  eta: 0:07:17  lr: 0.003797  min_lr: 0.003797  loss: 3.7766 (3.7755)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5482 (0.6471)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [60]  [1400/2502]  eta: 0:06:10  lr: 0.003797  min_lr: 0.003797  loss: 4.0024 (3.7770)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5854 (0.6471)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [60]  [1600/2502]  eta: 0:05:02  lr: 0.003796  min_lr: 0.003796  loss: 4.0639 (3.7643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5806 (0.6400)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [60]  [1800/2502]  eta: 0:03:55  lr: 0.003795  min_lr: 0.003795  loss: 3.9347 (3.7656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5999 (0.6452)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [60]  [2000/2502]  eta: 0:02:48  lr: 0.003794  min_lr: 0.003794  loss: 3.6944 (3.7670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6743 (0.6460)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [60]  [2200/2502]  eta: 0:01:41  lr: 0.003793  min_lr: 0.003793  loss: 3.5898 (3.7660)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6099 (0.6426)  time: 0.3350  data: 0.0003  max mem: 27255
Epoch: [60]  [2400/2502]  eta: 0:00:34  lr: 0.003793  min_lr: 0.003793  loss: 4.0715 (3.7671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6476 (0.6455)  time: 0.3359  data: 0.0003  max mem: 27255
Epoch: [60]  [2501/2502]  eta: 0:00:00  lr: 0.003792  min_lr: 0.003792  loss: 3.6713 (3.7627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6299 (0.6447)  time: 0.2998  data: 0.0007  max mem: 27255
Epoch: [60] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003792  min_lr: 0.003792  loss: 3.6713 (3.7513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6299 (0.6447)
Test:  [ 0/50]  eta: 0:03:07  loss: 0.7562 (0.7562)  acc1: 89.6000 (89.6000)  acc5: 98.4000 (98.4000)  time: 3.7426  data: 3.5417  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.0128 (1.0492)  acc1: 78.0000 (79.8182)  acc5: 95.2000 (95.2364)  time: 0.4953  data: 0.3276  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0128 (1.0595)  acc1: 77.6000 (78.8191)  acc5: 95.2000 (95.3524)  time: 0.1676  data: 0.0033  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2567 (1.1967)  acc1: 73.6000 (76.0516)  acc5: 92.0000 (93.3032)  time: 0.1981  data: 0.0342  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5918 (1.2918)  acc1: 68.0000 (73.9415)  acc5: 88.4000 (92.1561)  time: 0.2450  data: 0.0814  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5884 (1.3136)  acc1: 68.0000 (73.3840)  acc5: 88.8000 (91.9440)  time: 0.2154  data: 0.0520  max mem: 27255
Test: Total time: 0:00:13 (0.2728 s / it)
* Acc@1 73.434 Acc@5 92.148 loss 1.315
Accuracy of the model on the 50000 test images: 73.4%
Max accuracy: 73.45%
Epoch: [61]  [   0/2502]  eta: 1:30:11  lr: 0.003792  min_lr: 0.003792  loss: 3.8210 (3.8210)  weight_decay: 0.0500 (0.0500)  time: 2.1628  data: 1.8131  max mem: 27255
Epoch: [61]  [ 200/2502]  eta: 0:13:12  lr: 0.003791  min_lr: 0.003791  loss: 3.4888 (3.7289)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6095 (0.6257)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [61]  [ 400/2502]  eta: 0:11:52  lr: 0.003791  min_lr: 0.003791  loss: 3.8020 (3.7094)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5781 (0.6310)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [61]  [ 600/2502]  eta: 0:10:41  lr: 0.003790  min_lr: 0.003790  loss: 3.6866 (3.7022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5756 (0.6278)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [61]  [ 800/2502]  eta: 0:09:32  lr: 0.003789  min_lr: 0.003789  loss: 3.8456 (3.7089)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6659 (inf)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [61]  [1000/2502]  eta: 0:08:24  lr: 0.003788  min_lr: 0.003788  loss: 3.6487 (3.7316)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5602 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [61]  [1200/2502]  eta: 0:07:16  lr: 0.003787  min_lr: 0.003787  loss: 3.8252 (3.7322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7011 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [61]  [1400/2502]  eta: 0:06:09  lr: 0.003787  min_lr: 0.003787  loss: 3.5764 (3.7329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5966 (inf)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [61]  [1600/2502]  eta: 0:05:02  lr: 0.003786  min_lr: 0.003786  loss: 3.9798 (3.7400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6872 (inf)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [61]  [1800/2502]  eta: 0:03:55  lr: 0.003785  min_lr: 0.003785  loss: 3.9456 (3.7399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6977 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [61]  [2000/2502]  eta: 0:02:48  lr: 0.003784  min_lr: 0.003784  loss: 4.0661 (3.7441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6077 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [61]  [2200/2502]  eta: 0:01:41  lr: 0.003783  min_lr: 0.003783  loss: 3.7817 (3.7465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6028 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [61]  [2400/2502]  eta: 0:00:34  lr: 0.003782  min_lr: 0.003782  loss: 3.8554 (3.7455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6223 (inf)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [61]  [2501/2502]  eta: 0:00:00  lr: 0.003782  min_lr: 0.003782  loss: 3.8027 (3.7452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5912 (inf)  time: 0.3014  data: 0.0007  max mem: 27255
Epoch: [61] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003782  min_lr: 0.003782  loss: 3.8027 (3.7606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5912 (inf)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.7680 (0.7680)  acc1: 89.6000 (89.6000)  acc5: 98.4000 (98.4000)  time: 2.8047  data: 2.6112  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9913 (1.0082)  acc1: 79.6000 (79.6364)  acc5: 96.0000 (95.6000)  time: 0.4438  data: 0.2770  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0282 (1.0360)  acc1: 79.2000 (79.1238)  acc5: 95.6000 (95.6191)  time: 0.2213  data: 0.0566  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2101 (1.1876)  acc1: 73.6000 (76.0516)  acc5: 93.6000 (93.4323)  time: 0.2055  data: 0.0408  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5588 (1.2759)  acc1: 67.2000 (74.0293)  acc5: 88.4000 (92.3805)  time: 0.1928  data: 0.0290  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5200 (1.2890)  acc1: 69.6000 (73.5440)  acc5: 89.2000 (92.1840)  time: 0.1867  data: 0.0231  max mem: 27255
Test: Total time: 0:00:12 (0.2532 s / it)
* Acc@1 73.332 Acc@5 92.164 loss 1.297
Accuracy of the model on the 50000 test images: 73.3%
Max accuracy: 73.45%
Epoch: [62]  [   0/2502]  eta: 1:14:40  lr: 0.003782  min_lr: 0.003782  loss: 4.2741 (4.2741)  weight_decay: 0.0500 (0.0500)  time: 1.7906  data: 1.4550  max mem: 27255
Epoch: [62]  [ 200/2502]  eta: 0:13:06  lr: 0.003781  min_lr: 0.003781  loss: 3.9552 (3.7635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6287 (0.6079)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [62]  [ 400/2502]  eta: 0:11:50  lr: 0.003780  min_lr: 0.003780  loss: 3.6377 (3.7500)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6016 (0.6170)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [62]  [ 600/2502]  eta: 0:10:40  lr: 0.003780  min_lr: 0.003780  loss: 3.8783 (3.7598)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6692 (0.6520)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [62]  [ 800/2502]  eta: 0:09:32  lr: 0.003779  min_lr: 0.003779  loss: 3.7610 (3.7594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6065 (0.6475)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [62]  [1000/2502]  eta: 0:08:24  lr: 0.003778  min_lr: 0.003778  loss: 4.0270 (3.7528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6538 (0.6536)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [62]  [1200/2502]  eta: 0:07:16  lr: 0.003777  min_lr: 0.003777  loss: 3.9688 (3.7587)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5872 (0.6522)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [62]  [1400/2502]  eta: 0:06:09  lr: 0.003776  min_lr: 0.003776  loss: 3.7846 (3.7605)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6053 (0.6486)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [62]  [1600/2502]  eta: 0:05:02  lr: 0.003776  min_lr: 0.003776  loss: 3.8457 (3.7617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5551 (0.6507)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [62]  [1800/2502]  eta: 0:03:55  lr: 0.003775  min_lr: 0.003775  loss: 3.6308 (3.7598)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6101 (0.6493)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [62]  [2000/2502]  eta: 0:02:48  lr: 0.003774  min_lr: 0.003774  loss: 3.8622 (3.7561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5786 (0.6473)  time: 0.3438  data: 0.0005  max mem: 27255
Epoch: [62]  [2200/2502]  eta: 0:01:41  lr: 0.003773  min_lr: 0.003773  loss: 3.6511 (3.7495)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6068 (0.6463)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [62]  [2400/2502]  eta: 0:00:34  lr: 0.003772  min_lr: 0.003772  loss: 3.8760 (3.7530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5939 (0.6422)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [62]  [2501/2502]  eta: 0:00:00  lr: 0.003772  min_lr: 0.003772  loss: 4.0555 (3.7511)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6339 (0.6429)  time: 0.2998  data: 0.0009  max mem: 27255
Epoch: [62] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.003772  min_lr: 0.003772  loss: 4.0555 (3.7405)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6339 (0.6429)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.7685 (0.7685)  acc1: 90.0000 (90.0000)  acc5: 97.6000 (97.6000)  time: 3.2319  data: 3.0333  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1286 (1.1650)  acc1: 79.6000 (79.3091)  acc5: 95.6000 (95.4545)  time: 0.4485  data: 0.2814  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.2108 (1.2056)  acc1: 77.6000 (78.4571)  acc5: 95.6000 (95.3333)  time: 0.1821  data: 0.0181  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.4831 (1.3380)  acc1: 70.8000 (75.4065)  acc5: 92.0000 (93.5355)  time: 0.2076  data: 0.0436  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6698 (1.4192)  acc1: 67.6000 (73.5707)  acc5: 87.6000 (92.2146)  time: 0.2042  data: 0.0403  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6698 (1.4370)  acc1: 67.6000 (72.9040)  acc5: 88.4000 (91.9520)  time: 0.1824  data: 0.0173  max mem: 27255
Test: Total time: 0:00:12 (0.2537 s / it)
* Acc@1 72.912 Acc@5 92.030 loss 1.444
Accuracy of the model on the 50000 test images: 72.9%
Max accuracy: 73.45%
Epoch: [63]  [   0/2502]  eta: 1:35:33  lr: 0.003772  min_lr: 0.003772  loss: 3.9796 (3.9796)  weight_decay: 0.0500 (0.0500)  time: 2.2916  data: 1.4642  max mem: 27255
Epoch: [63]  [ 200/2502]  eta: 0:13:18  lr: 0.003771  min_lr: 0.003771  loss: 3.7517 (3.7806)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5853 (0.6371)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [63]  [ 400/2502]  eta: 0:11:55  lr: 0.003770  min_lr: 0.003770  loss: 4.1166 (3.7696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6375 (0.6422)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [63]  [ 600/2502]  eta: 0:10:43  lr: 0.003769  min_lr: 0.003769  loss: 4.0849 (3.7611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6595 (0.6513)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [63]  [ 800/2502]  eta: 0:09:34  lr: 0.003768  min_lr: 0.003768  loss: 3.8338 (3.7473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6061 (0.6435)  time: 0.3415  data: 0.0004  max mem: 27255
Epoch: [63]  [1000/2502]  eta: 0:08:25  lr: 0.003768  min_lr: 0.003768  loss: 3.7913 (3.7514)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5853 (0.6340)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [63]  [1200/2502]  eta: 0:07:17  lr: 0.003767  min_lr: 0.003767  loss: 3.5862 (3.7448)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5705 (0.6345)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [63]  [1400/2502]  eta: 0:06:10  lr: 0.003766  min_lr: 0.003766  loss: 3.8461 (3.7470)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6244 (0.6366)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [63]  [1600/2502]  eta: 0:05:02  lr: 0.003765  min_lr: 0.003765  loss: 3.7219 (3.7453)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6306 (0.6345)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [63]  [1800/2502]  eta: 0:03:55  lr: 0.003764  min_lr: 0.003764  loss: 3.9050 (3.7462)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6698 (0.6391)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [63]  [2000/2502]  eta: 0:02:48  lr: 0.003763  min_lr: 0.003763  loss: 3.7757 (3.7407)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6801 (0.6439)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [63]  [2200/2502]  eta: 0:01:41  lr: 0.003763  min_lr: 0.003763  loss: 3.7264 (3.7390)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5747 (0.6414)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [63]  [2400/2502]  eta: 0:00:34  lr: 0.003762  min_lr: 0.003762  loss: 3.9264 (3.7378)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6582 (0.6453)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [63]  [2501/2502]  eta: 0:00:00  lr: 0.003761  min_lr: 0.003761  loss: 3.5521 (3.7359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6644 (0.6469)  time: 0.3001  data: 0.0008  max mem: 27255
Epoch: [63] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003761  min_lr: 0.003761  loss: 3.5521 (3.7295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6644 (0.6469)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.7075 (0.7075)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 2.6053  data: 2.4104  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1271 (1.0944)  acc1: 78.4000 (80.1818)  acc5: 94.8000 (95.4909)  time: 0.4469  data: 0.2803  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1345 (1.1230)  acc1: 77.2000 (78.7810)  acc5: 95.6000 (95.4095)  time: 0.2051  data: 0.0413  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3461 (1.2526)  acc1: 72.4000 (76.0129)  acc5: 91.2000 (93.6129)  time: 0.1802  data: 0.0164  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6000 (1.3340)  acc1: 68.8000 (74.1073)  acc5: 88.8000 (92.4781)  time: 0.1830  data: 0.0159  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5336 (1.3480)  acc1: 68.8000 (73.5600)  acc5: 89.6000 (92.4080)  time: 0.1743  data: 0.0073  max mem: 27255
Test: Total time: 0:00:11 (0.2387 s / it)
* Acc@1 73.278 Acc@5 92.148 loss 1.366
Accuracy of the model on the 50000 test images: 73.3%
Max accuracy: 73.45%
Epoch: [64]  [   0/2502]  eta: 1:53:06  lr: 0.003761  min_lr: 0.003761  loss: 4.5202 (4.5202)  weight_decay: 0.0500 (0.0500)  time: 2.7125  data: 1.6758  max mem: 27255
Epoch: [64]  [ 200/2502]  eta: 0:13:18  lr: 0.003760  min_lr: 0.003760  loss: 3.9388 (3.7245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5863 (0.6103)  time: 0.3368  data: 0.0005  max mem: 27255
Epoch: [64]  [ 400/2502]  eta: 0:11:56  lr: 0.003760  min_lr: 0.003760  loss: 3.9218 (3.7371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6654 (0.6238)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [64]  [ 600/2502]  eta: 0:10:44  lr: 0.003759  min_lr: 0.003759  loss: 3.9989 (3.7299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5730 (0.6168)  time: 0.3408  data: 0.0004  max mem: 27255
Epoch: [64]  [ 800/2502]  eta: 0:09:35  lr: 0.003758  min_lr: 0.003758  loss: 3.7611 (3.7195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6153 (0.6216)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [64]  [1000/2502]  eta: 0:08:26  lr: 0.003757  min_lr: 0.003757  loss: 3.9436 (3.7282)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6192 (0.6292)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [64]  [1200/2502]  eta: 0:07:18  lr: 0.003756  min_lr: 0.003756  loss: 3.8736 (3.7269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6295 (0.6333)  time: 0.3379  data: 0.0004  max mem: 27255
Epoch: [64]  [1400/2502]  eta: 0:06:11  lr: 0.003755  min_lr: 0.003755  loss: 3.8647 (3.7276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5598 (inf)  time: 0.3391  data: 0.0004  max mem: 27255
Epoch: [64]  [1600/2502]  eta: 0:05:03  lr: 0.003754  min_lr: 0.003754  loss: 3.8752 (3.7249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6372 (inf)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [64]  [1800/2502]  eta: 0:03:56  lr: 0.003754  min_lr: 0.003754  loss: 3.6597 (3.7297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6060 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [64]  [2000/2502]  eta: 0:02:48  lr: 0.003753  min_lr: 0.003753  loss: 3.7922 (3.7326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6345 (inf)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [64]  [2200/2502]  eta: 0:01:41  lr: 0.003752  min_lr: 0.003752  loss: 3.7301 (3.7322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5767 (inf)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [64]  [2400/2502]  eta: 0:00:34  lr: 0.003751  min_lr: 0.003751  loss: 4.1636 (3.7399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6732 (inf)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [64]  [2501/2502]  eta: 0:00:00  lr: 0.003751  min_lr: 0.003751  loss: 3.6673 (3.7410)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6142 (inf)  time: 0.3008  data: 0.0009  max mem: 27255
Epoch: [64] Total time: 0:14:00 (0.3358 s / it)
Averaged stats: lr: 0.003751  min_lr: 0.003751  loss: 3.6673 (3.7397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6142 (inf)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.7074 (0.7074)  acc1: 91.2000 (91.2000)  acc5: 99.2000 (99.2000)  time: 2.9314  data: 2.7355  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0227 (1.1025)  acc1: 80.8000 (80.2545)  acc5: 96.0000 (95.8182)  time: 0.4626  data: 0.2958  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1258 (1.1418)  acc1: 76.4000 (78.7619)  acc5: 96.0000 (95.7333)  time: 0.2078  data: 0.0439  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3514 (1.2739)  acc1: 74.4000 (76.1161)  acc5: 91.6000 (93.7936)  time: 0.2146  data: 0.0507  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6130 (1.3537)  acc1: 68.4000 (74.3122)  acc5: 89.2000 (92.6342)  time: 0.1966  data: 0.0328  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6130 (1.3710)  acc1: 68.4000 (73.6640)  acc5: 89.2000 (92.4640)  time: 0.1897  data: 0.0240  max mem: 27255
Test: Total time: 0:00:12 (0.2526 s / it)
* Acc@1 73.730 Acc@5 92.364 loss 1.373
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 73.73%
Epoch: [65]  [   0/2502]  eta: 1:10:22  lr: 0.003751  min_lr: 0.003751  loss: 2.9450 (2.9450)  weight_decay: 0.0500 (0.0500)  time: 1.6876  data: 1.3478  max mem: 27255
Epoch: [65]  [ 200/2502]  eta: 0:13:06  lr: 0.003750  min_lr: 0.003750  loss: 3.8554 (3.7463)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6858 (0.6784)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [65]  [ 400/2502]  eta: 0:11:50  lr: 0.003749  min_lr: 0.003749  loss: 3.7518 (3.7510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6672 (0.6761)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [65]  [ 600/2502]  eta: 0:10:40  lr: 0.003748  min_lr: 0.003748  loss: 4.0924 (3.7593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6119 (0.6666)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [65]  [ 800/2502]  eta: 0:09:32  lr: 0.003747  min_lr: 0.003747  loss: 3.5886 (3.7512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5613 (0.6506)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [65]  [1000/2502]  eta: 0:08:24  lr: 0.003746  min_lr: 0.003746  loss: 3.9814 (3.7558)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5945 (0.6437)  time: 0.3441  data: 0.0004  max mem: 27255
Epoch: [65]  [1200/2502]  eta: 0:07:17  lr: 0.003745  min_lr: 0.003745  loss: 3.6952 (3.7589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5975 (0.6420)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [65]  [1400/2502]  eta: 0:06:09  lr: 0.003744  min_lr: 0.003744  loss: 4.0576 (3.7647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5928 (0.6410)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [65]  [1600/2502]  eta: 0:05:02  lr: 0.003744  min_lr: 0.003744  loss: 3.6679 (3.7562)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6811 (0.6414)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [65]  [1800/2502]  eta: 0:03:55  lr: 0.003743  min_lr: 0.003743  loss: 3.5326 (3.7550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5932 (0.6375)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [65]  [2000/2502]  eta: 0:02:48  lr: 0.003742  min_lr: 0.003742  loss: 3.9139 (3.7496)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6079 (0.6349)  time: 0.3366  data: 0.0003  max mem: 27255
Epoch: [65]  [2200/2502]  eta: 0:01:41  lr: 0.003741  min_lr: 0.003741  loss: 3.9001 (3.7474)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6592 (0.6359)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [65]  [2400/2502]  eta: 0:00:34  lr: 0.003740  min_lr: 0.003740  loss: 3.7481 (3.7454)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5656 (0.6369)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [65]  [2501/2502]  eta: 0:00:00  lr: 0.003740  min_lr: 0.003740  loss: 3.9998 (3.7498)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5361 (0.6352)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [65] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003740  min_lr: 0.003740  loss: 3.9998 (3.7327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5361 (0.6352)
Test:  [ 0/50]  eta: 0:02:13  loss: 0.8471 (0.8471)  acc1: 90.4000 (90.4000)  acc5: 97.6000 (97.6000)  time: 2.6678  data: 2.4643  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1418 (1.1547)  acc1: 78.4000 (78.8727)  acc5: 96.0000 (95.8182)  time: 0.4444  data: 0.2741  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1838 (1.1671)  acc1: 76.4000 (78.0952)  acc5: 96.0000 (95.7333)  time: 0.2228  data: 0.0574  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3239 (1.2904)  acc1: 72.0000 (75.4323)  acc5: 92.4000 (93.8065)  time: 0.2293  data: 0.0653  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6207 (1.3710)  acc1: 68.4000 (73.6000)  acc5: 88.0000 (92.5659)  time: 0.2198  data: 0.0522  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6321 (1.3922)  acc1: 68.8000 (72.9200)  acc5: 88.8000 (92.2800)  time: 0.1914  data: 0.0234  max mem: 27255
Test: Total time: 0:00:13 (0.2646 s / it)
* Acc@1 72.982 Acc@5 92.100 loss 1.399
Accuracy of the model on the 50000 test images: 73.0%
Max accuracy: 73.73%
Epoch: [66]  [   0/2502]  eta: 1:32:04  lr: 0.003740  min_lr: 0.003740  loss: 4.4858 (4.4858)  weight_decay: 0.0500 (0.0500)  time: 2.2079  data: 1.5576  max mem: 27255
Epoch: [66]  [ 200/2502]  eta: 0:13:12  lr: 0.003739  min_lr: 0.003739  loss: 3.7820 (3.6899)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6637 (0.6613)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [66]  [ 400/2502]  eta: 0:11:53  lr: 0.003738  min_lr: 0.003738  loss: 3.7463 (3.6954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6009 (0.6429)  time: 0.3420  data: 0.0004  max mem: 27255
Epoch: [66]  [ 600/2502]  eta: 0:10:42  lr: 0.003737  min_lr: 0.003737  loss: 4.0194 (3.7136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6578 (0.6521)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [66]  [ 800/2502]  eta: 0:09:34  lr: 0.003736  min_lr: 0.003736  loss: 3.6253 (3.7174)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6390 (0.6447)  time: 0.3418  data: 0.0004  max mem: 27255
Epoch: [66]  [1000/2502]  eta: 0:08:25  lr: 0.003735  min_lr: 0.003735  loss: 3.8485 (3.7100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5780 (0.6429)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [66]  [1200/2502]  eta: 0:07:17  lr: 0.003734  min_lr: 0.003734  loss: 4.1661 (3.7181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5832 (0.6439)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [66]  [1400/2502]  eta: 0:06:10  lr: 0.003733  min_lr: 0.003733  loss: 3.7755 (3.7195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6625 (0.6499)  time: 0.3346  data: 0.0003  max mem: 27255
Epoch: [66]  [1600/2502]  eta: 0:05:02  lr: 0.003732  min_lr: 0.003732  loss: 3.9410 (3.7264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5951 (0.6474)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [66]  [1800/2502]  eta: 0:03:55  lr: 0.003732  min_lr: 0.003732  loss: 3.7601 (3.7248)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5856 (0.6463)  time: 0.3357  data: 0.0003  max mem: 27255
Epoch: [66]  [2000/2502]  eta: 0:02:48  lr: 0.003731  min_lr: 0.003731  loss: 3.5402 (3.7267)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6020 (0.6457)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [66]  [2200/2502]  eta: 0:01:41  lr: 0.003730  min_lr: 0.003730  loss: 4.0136 (3.7236)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6194 (0.6465)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [66]  [2400/2502]  eta: 0:00:34  lr: 0.003729  min_lr: 0.003729  loss: 3.8903 (3.7210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6385 (0.6448)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [66]  [2501/2502]  eta: 0:00:00  lr: 0.003728  min_lr: 0.003728  loss: 3.7208 (3.7219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5930 (0.6434)  time: 0.3005  data: 0.0006  max mem: 27255
Epoch: [66] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003728  min_lr: 0.003728  loss: 3.7208 (3.7226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5930 (0.6434)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.6938 (0.6938)  acc1: 88.8000 (88.8000)  acc5: 98.0000 (98.0000)  time: 2.7477  data: 2.5467  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0438 (1.0395)  acc1: 79.2000 (80.8364)  acc5: 95.6000 (95.4182)  time: 0.4424  data: 0.2745  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0432 (1.0468)  acc1: 78.4000 (79.9810)  acc5: 95.6000 (95.5429)  time: 0.2170  data: 0.0527  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2657 (1.1892)  acc1: 74.0000 (76.6323)  acc5: 91.6000 (93.5742)  time: 0.2183  data: 0.0538  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5382 (1.2799)  acc1: 68.4000 (74.3024)  acc5: 88.8000 (92.4000)  time: 0.1995  data: 0.0340  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4906 (1.2974)  acc1: 68.4000 (73.7600)  acc5: 88.8000 (92.3040)  time: 0.1945  data: 0.0296  max mem: 27255
Test: Total time: 0:00:12 (0.2527 s / it)
* Acc@1 73.694 Acc@5 92.368 loss 1.301
Accuracy of the model on the 50000 test images: 73.7%
Max accuracy: 73.73%
Epoch: [67]  [   0/2502]  eta: 1:28:53  lr: 0.003728  min_lr: 0.003728  loss: 4.2892 (4.2892)  weight_decay: 0.0500 (0.0500)  time: 2.1315  data: 1.3286  max mem: 27255
Epoch: [67]  [ 200/2502]  eta: 0:13:11  lr: 0.003727  min_lr: 0.003727  loss: 3.9493 (3.7281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6536 (0.6367)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [67]  [ 400/2502]  eta: 0:11:52  lr: 0.003727  min_lr: 0.003727  loss: 3.9889 (3.7081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5790 (0.6249)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [67]  [ 600/2502]  eta: 0:10:41  lr: 0.003726  min_lr: 0.003726  loss: 3.8258 (3.7051)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6057 (0.6336)  time: 0.3404  data: 0.0004  max mem: 27255
Epoch: [67]  [ 800/2502]  eta: 0:09:32  lr: 0.003725  min_lr: 0.003725  loss: 3.9249 (3.7249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5914 (0.6257)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [67]  [1000/2502]  eta: 0:08:25  lr: 0.003724  min_lr: 0.003724  loss: 3.6171 (3.7205)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6689 (0.6387)  time: 0.3421  data: 0.0003  max mem: 27255
Epoch: [67]  [1200/2502]  eta: 0:07:17  lr: 0.003723  min_lr: 0.003723  loss: 3.9362 (3.7192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6103 (0.6415)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [67]  [1400/2502]  eta: 0:06:09  lr: 0.003722  min_lr: 0.003722  loss: 3.8270 (3.7216)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6081 (0.6402)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [67]  [1600/2502]  eta: 0:05:02  lr: 0.003721  min_lr: 0.003721  loss: 3.9156 (3.7353)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6186 (0.6420)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [67]  [1800/2502]  eta: 0:03:55  lr: 0.003720  min_lr: 0.003720  loss: 3.6205 (3.7346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6876 (0.6444)  time: 0.3353  data: 0.0003  max mem: 27255
Epoch: [67]  [2000/2502]  eta: 0:02:48  lr: 0.003719  min_lr: 0.003719  loss: 4.0572 (3.7445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6262 (0.6422)  time: 0.3344  data: 0.0003  max mem: 27255
Epoch: [67]  [2200/2502]  eta: 0:01:41  lr: 0.003718  min_lr: 0.003718  loss: 3.8432 (3.7453)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6026 (inf)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [67]  [2400/2502]  eta: 0:00:34  lr: 0.003717  min_lr: 0.003717  loss: 3.5547 (3.7419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6488 (inf)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [67]  [2501/2502]  eta: 0:00:00  lr: 0.003717  min_lr: 0.003717  loss: 3.7784 (3.7413)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6742 (inf)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [67] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003717  min_lr: 0.003717  loss: 3.7784 (3.7214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6742 (inf)
Test:  [ 0/50]  eta: 0:02:14  loss: 0.8440 (0.8440)  acc1: 90.0000 (90.0000)  acc5: 98.0000 (98.0000)  time: 2.6953  data: 2.5087  max mem: 27255
Test:  [10/50]  eta: 0:00:15  loss: 1.0431 (1.1099)  acc1: 80.4000 (80.2545)  acc5: 95.2000 (95.4182)  time: 0.3992  data: 0.2318  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1286 (1.1204)  acc1: 78.0000 (79.3333)  acc5: 96.0000 (95.5810)  time: 0.1829  data: 0.0182  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3226 (1.2523)  acc1: 73.2000 (76.6581)  acc5: 92.8000 (93.6774)  time: 0.2264  data: 0.0621  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5974 (1.3378)  acc1: 68.8000 (74.4488)  acc5: 88.4000 (92.5268)  time: 0.2295  data: 0.0643  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5881 (1.3578)  acc1: 68.4000 (73.9200)  acc5: 88.4000 (92.2400)  time: 0.1855  data: 0.0184  max mem: 27255
Test: Total time: 0:00:12 (0.2510 s / it)
* Acc@1 73.812 Acc@5 92.440 loss 1.355
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 73.81%
Epoch: [68]  [   0/2502]  eta: 1:28:56  lr: 0.003717  min_lr: 0.003717  loss: 3.9790 (3.9790)  weight_decay: 0.0500 (0.0500)  time: 2.1330  data: 1.7939  max mem: 27255
Epoch: [68]  [ 200/2502]  eta: 0:13:11  lr: 0.003716  min_lr: 0.003716  loss: 3.7119 (3.6752)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7055 (0.6553)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [68]  [ 400/2502]  eta: 0:11:54  lr: 0.003715  min_lr: 0.003715  loss: 3.8866 (3.7243)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5881 (0.6449)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [68]  [ 600/2502]  eta: 0:10:42  lr: 0.003714  min_lr: 0.003714  loss: 3.7944 (3.7186)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6251 (0.6422)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [68]  [ 800/2502]  eta: 0:09:33  lr: 0.003713  min_lr: 0.003713  loss: 3.7036 (3.7270)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5710 (0.6375)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [68]  [1000/2502]  eta: 0:08:25  lr: 0.003712  min_lr: 0.003712  loss: 3.6549 (3.7326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7159 (0.6423)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [68]  [1200/2502]  eta: 0:07:17  lr: 0.003711  min_lr: 0.003711  loss: 3.7629 (3.7237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5594 (0.6444)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [68]  [1400/2502]  eta: 0:06:10  lr: 0.003710  min_lr: 0.003710  loss: 4.0859 (3.7272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6304 (0.6435)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [68]  [1600/2502]  eta: 0:05:02  lr: 0.003710  min_lr: 0.003710  loss: 3.8818 (3.7300)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6169 (0.6447)  time: 0.3345  data: 0.0006  max mem: 27255
Epoch: [68]  [1800/2502]  eta: 0:03:55  lr: 0.003709  min_lr: 0.003709  loss: 3.7477 (3.7244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7050 (0.6509)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [68]  [2000/2502]  eta: 0:02:48  lr: 0.003708  min_lr: 0.003708  loss: 4.0066 (3.7212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5884 (0.6492)  time: 0.3374  data: 0.0006  max mem: 27255
Epoch: [68]  [2200/2502]  eta: 0:01:41  lr: 0.003707  min_lr: 0.003707  loss: 3.9942 (3.7182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6295 (0.6506)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [68]  [2400/2502]  eta: 0:00:34  lr: 0.003706  min_lr: 0.003706  loss: 3.7359 (3.7211)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6540 (0.6508)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [68]  [2501/2502]  eta: 0:00:00  lr: 0.003705  min_lr: 0.003705  loss: 3.9350 (3.7245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6398 (0.6519)  time: 0.3006  data: 0.0009  max mem: 27255
Epoch: [68] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003705  min_lr: 0.003705  loss: 3.9350 (3.7161)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6398 (0.6519)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.7742 (0.7742)  acc1: 90.0000 (90.0000)  acc5: 98.4000 (98.4000)  time: 2.6573  data: 2.4598  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.1505 (1.1386)  acc1: 80.8000 (80.2909)  acc5: 96.0000 (95.9636)  time: 0.4797  data: 0.3120  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.1521 (1.1604)  acc1: 78.4000 (79.2762)  acc5: 96.0000 (95.7524)  time: 0.2582  data: 0.0892  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.4082 (1.2822)  acc1: 73.2000 (76.5032)  acc5: 91.6000 (93.8710)  time: 0.2197  data: 0.0511  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5698 (1.3528)  acc1: 68.4000 (74.4098)  acc5: 89.2000 (92.7902)  time: 0.1923  data: 0.0285  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5484 (1.3677)  acc1: 68.4000 (73.8000)  acc5: 89.2000 (92.5920)  time: 0.1816  data: 0.0180  max mem: 27255
Test: Total time: 0:00:13 (0.2646 s / it)
* Acc@1 73.882 Acc@5 92.434 loss 1.369
Accuracy of the model on the 50000 test images: 73.9%
Max accuracy: 73.88%
Epoch: [69]  [   0/2502]  eta: 1:12:59  lr: 0.003705  min_lr: 0.003705  loss: 3.9252 (3.9252)  weight_decay: 0.0500 (0.0500)  time: 1.7506  data: 1.4125  max mem: 27255
Epoch: [69]  [ 200/2502]  eta: 0:13:05  lr: 0.003704  min_lr: 0.003704  loss: 4.0554 (3.6934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6037 (0.6672)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [69]  [ 400/2502]  eta: 0:11:50  lr: 0.003703  min_lr: 0.003703  loss: 3.9635 (3.7331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6036 (0.6464)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [69]  [ 600/2502]  eta: 0:10:40  lr: 0.003703  min_lr: 0.003703  loss: 3.5715 (3.6937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6377 (0.6468)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [69]  [ 800/2502]  eta: 0:09:32  lr: 0.003702  min_lr: 0.003702  loss: 3.7062 (3.6823)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5785 (0.6505)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [69]  [1000/2502]  eta: 0:08:25  lr: 0.003701  min_lr: 0.003701  loss: 3.7587 (3.6926)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5674 (0.6420)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [69]  [1200/2502]  eta: 0:07:17  lr: 0.003700  min_lr: 0.003700  loss: 3.7148 (3.7090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6648 (0.6499)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [69]  [1400/2502]  eta: 0:06:10  lr: 0.003699  min_lr: 0.003699  loss: 3.8929 (3.7151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6352 (0.6517)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [69]  [1600/2502]  eta: 0:05:02  lr: 0.003698  min_lr: 0.003698  loss: 4.0127 (3.7195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5858 (0.6526)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [69]  [1800/2502]  eta: 0:03:55  lr: 0.003697  min_lr: 0.003697  loss: 3.8235 (3.7197)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6136 (0.6509)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [69]  [2000/2502]  eta: 0:02:48  lr: 0.003696  min_lr: 0.003696  loss: 4.1074 (3.7208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5627 (0.6509)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [69]  [2200/2502]  eta: 0:01:41  lr: 0.003695  min_lr: 0.003695  loss: 3.9779 (3.7206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5988 (0.6505)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [69]  [2400/2502]  eta: 0:00:34  lr: 0.003694  min_lr: 0.003694  loss: 3.6164 (3.7155)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6182 (0.6486)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [69]  [2501/2502]  eta: 0:00:00  lr: 0.003694  min_lr: 0.003694  loss: 3.7568 (3.7145)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5616 (0.6461)  time: 0.3004  data: 0.0008  max mem: 27255
Epoch: [69] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.003694  min_lr: 0.003694  loss: 3.7568 (3.7162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5616 (0.6461)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.7470 (0.7470)  acc1: 88.8000 (88.8000)  acc5: 97.6000 (97.6000)  time: 3.3765  data: 3.1769  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0496 (1.0727)  acc1: 79.2000 (79.7818)  acc5: 96.0000 (95.4909)  time: 0.4706  data: 0.3033  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0608 (1.0836)  acc1: 77.6000 (78.6476)  acc5: 96.0000 (95.5619)  time: 0.1927  data: 0.0289  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2920 (1.2194)  acc1: 73.6000 (76.2194)  acc5: 92.0000 (93.6645)  time: 0.2197  data: 0.0560  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5344 (1.2959)  acc1: 69.2000 (74.3902)  acc5: 88.8000 (92.5073)  time: 0.2284  data: 0.0648  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5344 (1.3184)  acc1: 68.4000 (73.6720)  acc5: 88.8000 (92.3920)  time: 0.1937  data: 0.0298  max mem: 27255
Test: Total time: 0:00:13 (0.2674 s / it)
* Acc@1 73.816 Acc@5 92.368 loss 1.319
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 73.88%
Epoch: [70]  [   0/2502]  eta: 1:19:32  lr: 0.003694  min_lr: 0.003694  loss: 4.0375 (4.0375)  weight_decay: 0.0500 (0.0500)  time: 1.9076  data: 1.5624  max mem: 27255
Epoch: [70]  [ 200/2502]  eta: 0:13:06  lr: 0.003693  min_lr: 0.003693  loss: 3.7976 (3.7245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6212 (0.6635)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [70]  [ 400/2502]  eta: 0:11:51  lr: 0.003692  min_lr: 0.003692  loss: 3.6784 (3.7175)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6871 (0.6720)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [70]  [ 600/2502]  eta: 0:10:41  lr: 0.003691  min_lr: 0.003691  loss: 3.6865 (3.7220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6174 (0.6666)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [70]  [ 800/2502]  eta: 0:09:33  lr: 0.003690  min_lr: 0.003690  loss: 3.9445 (3.7331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5834 (0.6613)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [70]  [1000/2502]  eta: 0:08:24  lr: 0.003689  min_lr: 0.003689  loss: 3.6922 (3.7244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6083 (0.6620)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [70]  [1200/2502]  eta: 0:07:17  lr: 0.003688  min_lr: 0.003688  loss: 3.8048 (3.7174)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6081 (0.6609)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [70]  [1400/2502]  eta: 0:06:09  lr: 0.003687  min_lr: 0.003687  loss: 3.6833 (3.7261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5890 (0.6630)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [70]  [1600/2502]  eta: 0:05:02  lr: 0.003686  min_lr: 0.003686  loss: 3.9631 (3.7131)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [70]  [1800/2502]  eta: 0:03:55  lr: 0.003685  min_lr: 0.003685  loss: 3.7481 (3.7083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5803 (nan)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [70]  [2000/2502]  eta: 0:02:48  lr: 0.003684  min_lr: 0.003684  loss: 3.8516 (3.7059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6475 (nan)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [70]  [2200/2502]  eta: 0:01:41  lr: 0.003683  min_lr: 0.003683  loss: 3.5448 (3.7063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6432 (nan)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [70]  [2400/2502]  eta: 0:00:34  lr: 0.003682  min_lr: 0.003682  loss: 3.9283 (3.7129)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5949 (nan)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [70]  [2501/2502]  eta: 0:00:00  lr: 0.003682  min_lr: 0.003682  loss: 3.6052 (3.7170)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5901 (nan)  time: 0.3015  data: 0.0009  max mem: 27255
Epoch: [70] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003682  min_lr: 0.003682  loss: 3.6052 (3.7087)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5901 (nan)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.7590 (0.7590)  acc1: 90.8000 (90.8000)  acc5: 98.0000 (98.0000)  time: 2.7620  data: 2.5780  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0929 (1.1290)  acc1: 81.6000 (80.8727)  acc5: 95.6000 (95.7818)  time: 0.4397  data: 0.2742  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1326 (1.1520)  acc1: 78.0000 (79.3333)  acc5: 95.2000 (95.5810)  time: 0.1976  data: 0.0335  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2680 (1.2606)  acc1: 74.4000 (76.5548)  acc5: 92.0000 (93.8194)  time: 0.1805  data: 0.0157  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5677 (1.3340)  acc1: 68.8000 (74.3415)  acc5: 89.6000 (92.8781)  time: 0.1837  data: 0.0186  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5275 (1.3503)  acc1: 66.8000 (73.6960)  acc5: 89.6000 (92.6720)  time: 0.1794  data: 0.0150  max mem: 27255
Test: Total time: 0:00:11 (0.2390 s / it)
* Acc@1 73.818 Acc@5 92.572 loss 1.350
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 73.88%
Epoch: [71]  [   0/2502]  eta: 1:34:19  lr: 0.003681  min_lr: 0.003681  loss: 3.6242 (3.6242)  weight_decay: 0.0500 (0.0500)  time: 2.2618  data: 1.4278  max mem: 27255
Epoch: [71]  [ 200/2502]  eta: 0:13:11  lr: 0.003681  min_lr: 0.003681  loss: 4.0356 (3.7561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6392 (0.6343)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [71]  [ 400/2502]  eta: 0:11:51  lr: 0.003680  min_lr: 0.003680  loss: 3.9630 (3.7176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5824 (0.6424)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [71]  [ 600/2502]  eta: 0:10:41  lr: 0.003679  min_lr: 0.003679  loss: 3.4280 (3.6884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6171 (0.6384)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [71]  [ 800/2502]  eta: 0:09:32  lr: 0.003678  min_lr: 0.003678  loss: 3.6294 (3.6868)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6218 (0.6444)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [71]  [1000/2502]  eta: 0:08:25  lr: 0.003677  min_lr: 0.003677  loss: 3.8391 (3.6900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6387 (0.6457)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [71]  [1200/2502]  eta: 0:07:17  lr: 0.003676  min_lr: 0.003676  loss: 3.7975 (3.7030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6048 (0.6500)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [71]  [1400/2502]  eta: 0:06:09  lr: 0.003675  min_lr: 0.003675  loss: 3.8926 (3.7025)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6380 (0.6500)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [71]  [1600/2502]  eta: 0:05:02  lr: 0.003674  min_lr: 0.003674  loss: 3.7433 (3.7048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6303 (0.6444)  time: 0.3367  data: 0.0005  max mem: 27255
Epoch: [71]  [1800/2502]  eta: 0:03:55  lr: 0.003673  min_lr: 0.003673  loss: 3.9347 (3.7006)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6682 (0.6447)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [71]  [2000/2502]  eta: 0:02:48  lr: 0.003672  min_lr: 0.003672  loss: 3.7661 (3.6998)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6052 (0.6432)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [71]  [2200/2502]  eta: 0:01:41  lr: 0.003671  min_lr: 0.003671  loss: 3.8894 (3.6912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6860 (0.6469)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [71]  [2400/2502]  eta: 0:00:34  lr: 0.003670  min_lr: 0.003670  loss: 3.8369 (3.6910)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6506 (0.6468)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [71]  [2501/2502]  eta: 0:00:00  lr: 0.003669  min_lr: 0.003669  loss: 3.9721 (3.6900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (0.6452)  time: 0.2999  data: 0.0009  max mem: 27255
Epoch: [71] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003669  min_lr: 0.003669  loss: 3.9721 (3.7099)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5915 (0.6452)
Test:  [ 0/50]  eta: 0:02:25  loss: 0.9104 (0.9104)  acc1: 88.8000 (88.8000)  acc5: 97.6000 (97.6000)  time: 2.9083  data: 2.7123  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.1465 (1.2000)  acc1: 80.8000 (79.1273)  acc5: 96.0000 (95.6000)  time: 0.4946  data: 0.3278  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.1508 (1.1977)  acc1: 78.8000 (78.8381)  acc5: 96.0000 (95.8286)  time: 0.2571  data: 0.0932  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3791 (1.3165)  acc1: 74.4000 (76.1936)  acc5: 92.4000 (93.8065)  time: 0.2353  data: 0.0710  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6302 (1.4006)  acc1: 68.4000 (74.1171)  acc5: 89.2000 (92.7512)  time: 0.1950  data: 0.0276  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6302 (1.4176)  acc1: 68.4000 (73.5440)  acc5: 89.2000 (92.5040)  time: 0.1947  data: 0.0275  max mem: 27255
Test: Total time: 0:00:13 (0.2706 s / it)
* Acc@1 73.802 Acc@5 92.558 loss 1.419
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 73.88%
Epoch: [72]  [   0/2502]  eta: 1:26:00  lr: 0.003669  min_lr: 0.003669  loss: 4.3300 (4.3300)  weight_decay: 0.0500 (0.0500)  time: 2.0625  data: 1.3355  max mem: 27255
Epoch: [72]  [ 200/2502]  eta: 0:13:10  lr: 0.003668  min_lr: 0.003668  loss: 3.8042 (3.7011)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6454 (0.6456)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [72]  [ 400/2502]  eta: 0:11:52  lr: 0.003667  min_lr: 0.003667  loss: 3.6651 (3.6742)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5717 (0.6302)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [72]  [ 600/2502]  eta: 0:10:41  lr: 0.003666  min_lr: 0.003666  loss: 3.6462 (3.6801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6611 (0.6311)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [72]  [ 800/2502]  eta: 0:09:33  lr: 0.003665  min_lr: 0.003665  loss: 3.7037 (3.6812)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6278 (0.6314)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [72]  [1000/2502]  eta: 0:08:24  lr: 0.003664  min_lr: 0.003664  loss: 3.6398 (3.6747)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6245 (0.6334)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [72]  [1200/2502]  eta: 0:07:17  lr: 0.003663  min_lr: 0.003663  loss: 3.8948 (3.6771)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6453 (0.6387)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [72]  [1400/2502]  eta: 0:06:09  lr: 0.003662  min_lr: 0.003662  loss: 3.6264 (3.6830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6226 (0.6444)  time: 0.3362  data: 0.0005  max mem: 27255
Epoch: [72]  [1600/2502]  eta: 0:05:02  lr: 0.003661  min_lr: 0.003661  loss: 3.8157 (3.6886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6327 (0.6441)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [72]  [1800/2502]  eta: 0:03:55  lr: 0.003660  min_lr: 0.003660  loss: 3.7656 (3.6952)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6407 (0.6439)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [72]  [2000/2502]  eta: 0:02:48  lr: 0.003659  min_lr: 0.003659  loss: 3.9240 (3.6943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6605 (0.6459)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [72]  [2200/2502]  eta: 0:01:41  lr: 0.003658  min_lr: 0.003658  loss: 3.6662 (3.6946)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5564 (0.6474)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [72]  [2400/2502]  eta: 0:00:34  lr: 0.003657  min_lr: 0.003657  loss: 3.7017 (3.6906)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6152 (0.6490)  time: 0.3367  data: 0.0005  max mem: 27255
Epoch: [72]  [2501/2502]  eta: 0:00:00  lr: 0.003657  min_lr: 0.003657  loss: 3.9600 (3.6895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6302 (0.6500)  time: 0.2999  data: 0.0009  max mem: 27255
Epoch: [72] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003657  min_lr: 0.003657  loss: 3.9600 (3.6892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6302 (0.6500)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.8077 (0.8077)  acc1: 89.2000 (89.2000)  acc5: 98.4000 (98.4000)  time: 3.3099  data: 3.1085  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 1.1668 (1.1685)  acc1: 80.4000 (79.9636)  acc5: 95.6000 (95.4182)  time: 0.5233  data: 0.3557  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.1551 (1.1615)  acc1: 78.0000 (79.1429)  acc5: 96.4000 (95.5619)  time: 0.2368  data: 0.0723  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3150 (1.2640)  acc1: 74.8000 (76.3226)  acc5: 91.2000 (93.8452)  time: 0.2288  data: 0.0618  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.5487 (1.3316)  acc1: 68.8000 (74.2634)  acc5: 89.2000 (92.8781)  time: 0.2150  data: 0.0480  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5096 (1.3493)  acc1: 68.4000 (73.5600)  acc5: 89.2000 (92.5920)  time: 0.2128  data: 0.0478  max mem: 27255
Test: Total time: 0:00:13 (0.2781 s / it)
* Acc@1 73.818 Acc@5 92.466 loss 1.350
Accuracy of the model on the 50000 test images: 73.8%
Max accuracy: 73.88%
Epoch: [73]  [   0/2502]  eta: 1:21:13  lr: 0.003657  min_lr: 0.003657  loss: 3.7242 (3.7242)  weight_decay: 0.0500 (0.0500)  time: 1.9479  data: 1.5432  max mem: 27255
Epoch: [73]  [ 200/2502]  eta: 0:13:13  lr: 0.003656  min_lr: 0.003656  loss: 3.8170 (3.6874)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6225 (0.6508)  time: 0.3382  data: 0.0005  max mem: 27255
Epoch: [73]  [ 400/2502]  eta: 0:11:54  lr: 0.003655  min_lr: 0.003655  loss: 3.8357 (3.6555)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6089 (0.6460)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [73]  [ 600/2502]  eta: 0:10:43  lr: 0.003654  min_lr: 0.003654  loss: 3.7503 (3.6550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6084 (0.6480)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [73]  [ 800/2502]  eta: 0:09:34  lr: 0.003653  min_lr: 0.003653  loss: 4.0447 (3.6681)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5696 (0.6445)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [73]  [1000/2502]  eta: 0:08:26  lr: 0.003652  min_lr: 0.003652  loss: 3.5575 (3.6645)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6416 (0.6433)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [73]  [1200/2502]  eta: 0:07:18  lr: 0.003651  min_lr: 0.003651  loss: 3.7531 (3.6738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6820 (0.6470)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [73]  [1400/2502]  eta: 0:06:10  lr: 0.003650  min_lr: 0.003650  loss: 3.8245 (3.6766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5928 (0.6530)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [73]  [1600/2502]  eta: 0:05:03  lr: 0.003649  min_lr: 0.003649  loss: 3.5361 (3.6722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6031 (0.6494)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [73]  [1800/2502]  eta: 0:03:55  lr: 0.003648  min_lr: 0.003648  loss: 3.8460 (3.6705)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6527 (0.6483)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [73]  [2000/2502]  eta: 0:02:48  lr: 0.003647  min_lr: 0.003647  loss: 3.9795 (3.6773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6364 (0.6473)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [73]  [2200/2502]  eta: 0:01:41  lr: 0.003646  min_lr: 0.003646  loss: 3.8748 (3.6805)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6379 (0.6474)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [73]  [2400/2502]  eta: 0:00:34  lr: 0.003645  min_lr: 0.003645  loss: 3.8010 (3.6816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5895 (0.6458)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [73]  [2501/2502]  eta: 0:00:00  lr: 0.003644  min_lr: 0.003644  loss: 3.7698 (3.6795)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6164 (0.6459)  time: 0.3005  data: 0.0008  max mem: 27255
Epoch: [73] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003644  min_lr: 0.003644  loss: 3.7698 (3.6940)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6164 (0.6459)
Test:  [ 0/50]  eta: 0:02:14  loss: 0.7608 (0.7608)  acc1: 92.4000 (92.4000)  acc5: 98.0000 (98.0000)  time: 2.6903  data: 2.4851  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.1044 (1.1127)  acc1: 78.0000 (80.0364)  acc5: 95.6000 (95.7455)  time: 0.4239  data: 0.2561  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1044 (1.1263)  acc1: 77.6000 (79.3714)  acc5: 95.6000 (95.7524)  time: 0.2264  data: 0.0624  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2738 (1.2394)  acc1: 74.8000 (76.8129)  acc5: 92.8000 (93.9097)  time: 0.2382  data: 0.0738  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5604 (1.3143)  acc1: 70.4000 (74.9171)  acc5: 89.6000 (92.9659)  time: 0.2093  data: 0.0450  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5505 (1.3364)  acc1: 69.6000 (74.1280)  acc5: 90.4000 (92.7520)  time: 0.1809  data: 0.0171  max mem: 27255
Test: Total time: 0:00:12 (0.2598 s / it)
* Acc@1 74.056 Acc@5 92.610 loss 1.342
Accuracy of the model on the 50000 test images: 74.1%
Max accuracy: 74.06%
Epoch: [74]  [   0/2502]  eta: 1:21:38  lr: 0.003644  min_lr: 0.003644  loss: 4.0514 (4.0514)  weight_decay: 0.0500 (0.0500)  time: 1.9578  data: 1.6067  max mem: 27255
Epoch: [74]  [ 200/2502]  eta: 0:13:08  lr: 0.003643  min_lr: 0.003643  loss: 3.7647 (3.6896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6076 (0.6076)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [74]  [ 400/2502]  eta: 0:11:53  lr: 0.003642  min_lr: 0.003642  loss: 3.8390 (3.6635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5958 (0.6341)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [74]  [ 600/2502]  eta: 0:10:42  lr: 0.003641  min_lr: 0.003641  loss: 3.8563 (3.6664)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5851 (0.6348)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [74]  [ 800/2502]  eta: 0:09:33  lr: 0.003640  min_lr: 0.003640  loss: 4.0521 (3.6868)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6099 (0.6517)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [74]  [1000/2502]  eta: 0:08:25  lr: 0.003639  min_lr: 0.003639  loss: 3.8900 (3.6894)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6335 (0.6446)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [74]  [1200/2502]  eta: 0:07:17  lr: 0.003638  min_lr: 0.003638  loss: 3.7673 (3.6987)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6111 (0.6475)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [74]  [1400/2502]  eta: 0:06:10  lr: 0.003637  min_lr: 0.003637  loss: 3.9351 (3.6954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6012 (0.6463)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [74]  [1600/2502]  eta: 0:05:02  lr: 0.003636  min_lr: 0.003636  loss: 3.7968 (3.6995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6255 (0.6496)  time: 0.3368  data: 0.0004  max mem: 27255
Epoch: [74]  [1800/2502]  eta: 0:03:55  lr: 0.003635  min_lr: 0.003635  loss: 3.6686 (3.7021)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5764 (0.6457)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [74]  [2000/2502]  eta: 0:02:48  lr: 0.003634  min_lr: 0.003634  loss: 3.6959 (3.7006)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6338 (0.6458)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [74]  [2200/2502]  eta: 0:01:41  lr: 0.003633  min_lr: 0.003633  loss: 3.7541 (3.6916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6368 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [74]  [2400/2502]  eta: 0:00:34  lr: 0.003632  min_lr: 0.003632  loss: 3.8166 (3.6909)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7090 (inf)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [74]  [2501/2502]  eta: 0:00:00  lr: 0.003631  min_lr: 0.003631  loss: 3.8810 (3.6900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6387 (inf)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [74] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003631  min_lr: 0.003631  loss: 3.8810 (3.6889)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6387 (inf)
Test:  [ 0/50]  eta: 0:02:13  loss: 0.7780 (0.7780)  acc1: 91.2000 (91.2000)  acc5: 98.0000 (98.0000)  time: 2.6717  data: 2.4655  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0796 (1.1210)  acc1: 82.4000 (80.5091)  acc5: 95.6000 (95.5636)  time: 0.4506  data: 0.2829  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.1485 (1.1506)  acc1: 78.8000 (79.6571)  acc5: 95.6000 (95.8095)  time: 0.2661  data: 0.1006  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3711 (1.2835)  acc1: 72.0000 (76.4645)  acc5: 93.2000 (93.7032)  time: 0.2718  data: 0.1058  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6078 (1.3683)  acc1: 69.6000 (74.3902)  acc5: 88.4000 (92.5171)  time: 0.2063  data: 0.0421  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5822 (1.3774)  acc1: 69.2000 (73.8480)  acc5: 88.8000 (92.4080)  time: 0.1683  data: 0.0046  max mem: 27255
Test: Total time: 0:00:13 (0.2736 s / it)
* Acc@1 74.246 Acc@5 92.522 loss 1.377
Accuracy of the model on the 50000 test images: 74.2%
Max accuracy: 74.25%
Epoch: [75]  [   0/2502]  eta: 1:27:12  lr: 0.003631  min_lr: 0.003631  loss: 2.5637 (2.5637)  weight_decay: 0.0500 (0.0500)  time: 2.0915  data: 1.7489  max mem: 27255
Epoch: [75]  [ 200/2502]  eta: 0:13:09  lr: 0.003630  min_lr: 0.003630  loss: 3.9916 (3.6581)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6083 (0.6586)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [75]  [ 400/2502]  eta: 0:11:52  lr: 0.003629  min_lr: 0.003629  loss: 3.4524 (3.6536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5760 (0.6397)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [75]  [ 600/2502]  eta: 0:10:42  lr: 0.003628  min_lr: 0.003628  loss: 3.5147 (3.6383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (0.6475)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [75]  [ 800/2502]  eta: 0:09:33  lr: 0.003627  min_lr: 0.003627  loss: 3.7910 (3.6314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6047 (0.6499)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [75]  [1000/2502]  eta: 0:08:25  lr: 0.003626  min_lr: 0.003626  loss: 3.6777 (3.6455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6005 (0.6458)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [75]  [1200/2502]  eta: 0:07:17  lr: 0.003625  min_lr: 0.003625  loss: 3.3224 (3.6501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6150 (0.6552)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [75]  [1400/2502]  eta: 0:06:10  lr: 0.003624  min_lr: 0.003624  loss: 3.8059 (3.6502)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6135 (0.6560)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [75]  [1600/2502]  eta: 0:05:02  lr: 0.003623  min_lr: 0.003623  loss: 3.7668 (3.6615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6024 (0.6523)  time: 0.3354  data: 0.0003  max mem: 27255
Epoch: [75]  [1800/2502]  eta: 0:03:55  lr: 0.003622  min_lr: 0.003622  loss: 4.0420 (3.6683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5659 (0.6494)  time: 0.3393  data: 0.0004  max mem: 27255
Epoch: [75]  [2000/2502]  eta: 0:02:48  lr: 0.003621  min_lr: 0.003621  loss: 3.8676 (3.6736)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6664 (0.6503)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [75]  [2200/2502]  eta: 0:01:41  lr: 0.003620  min_lr: 0.003620  loss: 3.7894 (3.6780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6252 (0.6489)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [75]  [2400/2502]  eta: 0:00:34  lr: 0.003619  min_lr: 0.003619  loss: 3.8892 (3.6807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6084 (0.6489)  time: 0.3368  data: 0.0004  max mem: 27255
Epoch: [75]  [2501/2502]  eta: 0:00:00  lr: 0.003618  min_lr: 0.003618  loss: 3.7471 (3.6851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5953 (0.6482)  time: 0.3000  data: 0.0008  max mem: 27255
Epoch: [75] Total time: 0:13:59 (0.3357 s / it)
Averaged stats: lr: 0.003618  min_lr: 0.003618  loss: 3.7471 (3.6856)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5953 (0.6482)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.8252 (0.8252)  acc1: 90.0000 (90.0000)  acc5: 98.0000 (98.0000)  time: 3.1540  data: 2.9597  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1296 (1.1664)  acc1: 79.2000 (79.6000)  acc5: 94.8000 (95.4182)  time: 0.4370  data: 0.2694  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1994 (1.1729)  acc1: 78.0000 (79.3333)  acc5: 94.8000 (95.6000)  time: 0.1998  data: 0.0354  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3355 (1.2989)  acc1: 75.2000 (76.8129)  acc5: 92.4000 (93.8452)  time: 0.2103  data: 0.0464  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6513 (1.3822)  acc1: 69.6000 (74.8390)  acc5: 89.6000 (92.8098)  time: 0.1814  data: 0.0176  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6343 (1.4092)  acc1: 68.0000 (73.9680)  acc5: 88.8000 (92.4240)  time: 0.1832  data: 0.0174  max mem: 27255
Test: Total time: 0:00:12 (0.2479 s / it)
* Acc@1 74.010 Acc@5 92.574 loss 1.414
Accuracy of the model on the 50000 test images: 74.0%
Max accuracy: 74.25%
Epoch: [76]  [   0/2502]  eta: 1:34:00  lr: 0.003618  min_lr: 0.003618  loss: 3.8067 (3.8067)  weight_decay: 0.0500 (0.0500)  time: 2.2545  data: 1.2826  max mem: 27255
Epoch: [76]  [ 200/2502]  eta: 0:13:10  lr: 0.003617  min_lr: 0.003617  loss: 3.7922 (3.6912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5783 (0.6016)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [76]  [ 400/2502]  eta: 0:11:52  lr: 0.003616  min_lr: 0.003616  loss: 4.0281 (3.6908)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5809 (0.6397)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [76]  [ 600/2502]  eta: 0:10:42  lr: 0.003615  min_lr: 0.003615  loss: 3.5770 (3.6799)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6049 (0.6342)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [76]  [ 800/2502]  eta: 0:09:32  lr: 0.003614  min_lr: 0.003614  loss: 3.9560 (3.6710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6146 (0.6353)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [76]  [1000/2502]  eta: 0:08:24  lr: 0.003613  min_lr: 0.003613  loss: 4.0137 (3.6784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6742 (0.6396)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [76]  [1200/2502]  eta: 0:07:17  lr: 0.003612  min_lr: 0.003612  loss: 3.5194 (3.6787)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (0.6392)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [76]  [1400/2502]  eta: 0:06:10  lr: 0.003611  min_lr: 0.003611  loss: 3.4650 (3.6854)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6640 (0.6399)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [76]  [1600/2502]  eta: 0:05:02  lr: 0.003610  min_lr: 0.003610  loss: 3.4043 (3.6774)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6556 (0.6424)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [76]  [1800/2502]  eta: 0:03:55  lr: 0.003609  min_lr: 0.003609  loss: 3.6628 (3.6807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5898 (0.6424)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [76]  [2000/2502]  eta: 0:02:48  lr: 0.003608  min_lr: 0.003608  loss: 3.9187 (3.6811)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6239 (0.6456)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [76]  [2200/2502]  eta: 0:01:41  lr: 0.003606  min_lr: 0.003606  loss: 3.5730 (3.6801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6403 (0.6464)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [76]  [2400/2502]  eta: 0:00:34  lr: 0.003605  min_lr: 0.003605  loss: 4.0391 (3.6809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6791 (0.6462)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [76]  [2501/2502]  eta: 0:00:00  lr: 0.003605  min_lr: 0.003605  loss: 3.4508 (3.6800)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6041 (0.6465)  time: 0.3010  data: 0.0008  max mem: 27255
Epoch: [76] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003605  min_lr: 0.003605  loss: 3.4508 (3.6857)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6041 (0.6465)
Test:  [ 0/50]  eta: 0:02:06  loss: 0.6423 (0.6423)  acc1: 90.0000 (90.0000)  acc5: 97.6000 (97.6000)  time: 2.5366  data: 2.3287  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0230 (1.0086)  acc1: 82.0000 (80.5818)  acc5: 95.2000 (95.2727)  time: 0.4677  data: 0.2998  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0092 (1.0204)  acc1: 78.8000 (79.1810)  acc5: 95.2000 (95.5048)  time: 0.2563  data: 0.0924  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1749 (1.1458)  acc1: 73.2000 (76.5290)  acc5: 92.8000 (93.7677)  time: 0.2310  data: 0.0663  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4545 (1.2330)  acc1: 70.4000 (74.5854)  acc5: 88.8000 (92.7220)  time: 0.2105  data: 0.0443  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4545 (1.2567)  acc1: 69.6000 (74.1040)  acc5: 89.6000 (92.4720)  time: 0.1874  data: 0.0220  max mem: 27255
Test: Total time: 0:00:13 (0.2689 s / it)
* Acc@1 74.116 Acc@5 92.596 loss 1.258
Accuracy of the model on the 50000 test images: 74.1%
Max accuracy: 74.25%
Epoch: [77]  [   0/2502]  eta: 1:23:30  lr: 0.003605  min_lr: 0.003605  loss: 3.1464 (3.1464)  weight_decay: 0.0500 (0.0500)  time: 2.0028  data: 1.6528  max mem: 27255
Epoch: [77]  [ 200/2502]  eta: 0:13:08  lr: 0.003604  min_lr: 0.003604  loss: 3.5318 (3.5634)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6344 (0.6148)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [77]  [ 400/2502]  eta: 0:11:51  lr: 0.003603  min_lr: 0.003603  loss: 3.6802 (3.6335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5776 (0.6204)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [77]  [ 600/2502]  eta: 0:10:40  lr: 0.003602  min_lr: 0.003602  loss: 3.9746 (3.6345)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5947 (0.6156)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [77]  [ 800/2502]  eta: 0:09:32  lr: 0.003601  min_lr: 0.003601  loss: 3.9353 (3.6402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5976 (0.6198)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [77]  [1000/2502]  eta: 0:08:25  lr: 0.003599  min_lr: 0.003599  loss: 3.4811 (3.6439)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5959 (0.6268)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [77]  [1200/2502]  eta: 0:07:17  lr: 0.003598  min_lr: 0.003598  loss: 3.7999 (3.6363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5977 (0.6302)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [77]  [1400/2502]  eta: 0:06:10  lr: 0.003597  min_lr: 0.003597  loss: 4.0273 (3.6475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6904 (0.6334)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [77]  [1600/2502]  eta: 0:05:02  lr: 0.003596  min_lr: 0.003596  loss: 3.3893 (3.6494)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6048 (0.6333)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [77]  [1800/2502]  eta: 0:03:55  lr: 0.003595  min_lr: 0.003595  loss: 3.9395 (3.6530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6130 (0.6357)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [77]  [2000/2502]  eta: 0:02:48  lr: 0.003594  min_lr: 0.003594  loss: 3.8789 (3.6537)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6032 (0.6360)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [77]  [2200/2502]  eta: 0:01:41  lr: 0.003593  min_lr: 0.003593  loss: 3.7344 (3.6592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6147 (0.6366)  time: 0.3413  data: 0.0004  max mem: 27255
Epoch: [77]  [2400/2502]  eta: 0:00:34  lr: 0.003592  min_lr: 0.003592  loss: 3.8675 (3.6632)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6232 (0.6362)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [77]  [2501/2502]  eta: 0:00:00  lr: 0.003591  min_lr: 0.003591  loss: 3.7312 (3.6644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6213 (0.6369)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [77] Total time: 0:13:59 (0.3353 s / it)
Averaged stats: lr: 0.003591  min_lr: 0.003591  loss: 3.7312 (3.6700)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6213 (0.6369)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.7074 (0.7074)  acc1: 90.0000 (90.0000)  acc5: 98.8000 (98.8000)  time: 2.7678  data: 2.5627  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0814 (1.0963)  acc1: 81.6000 (80.6182)  acc5: 96.4000 (95.9273)  time: 0.4112  data: 0.2435  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1283 (1.1107)  acc1: 78.8000 (79.5619)  acc5: 96.0000 (95.7714)  time: 0.1858  data: 0.0219  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2722 (1.2248)  acc1: 73.6000 (77.0710)  acc5: 92.4000 (93.8581)  time: 0.2057  data: 0.0413  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5388 (1.3002)  acc1: 70.4000 (75.0342)  acc5: 89.2000 (92.8781)  time: 0.2053  data: 0.0401  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5353 (1.3246)  acc1: 69.2000 (74.3120)  acc5: 89.6000 (92.6400)  time: 0.1806  data: 0.0159  max mem: 27255
Test: Total time: 0:00:12 (0.2434 s / it)
* Acc@1 74.480 Acc@5 92.766 loss 1.324
Accuracy of the model on the 50000 test images: 74.5%
Max accuracy: 74.48%
Epoch: [78]  [   0/2502]  eta: 1:24:21  lr: 0.003591  min_lr: 0.003591  loss: 3.8677 (3.8677)  weight_decay: 0.0500 (0.0500)  time: 2.0228  data: 1.6797  max mem: 27255
Epoch: [78]  [ 200/2502]  eta: 0:13:06  lr: 0.003590  min_lr: 0.003590  loss: 3.9762 (3.6635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6164 (0.6324)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [78]  [ 400/2502]  eta: 0:11:50  lr: 0.003589  min_lr: 0.003589  loss: 3.4400 (3.6379)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6115 (0.6496)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [78]  [ 600/2502]  eta: 0:10:40  lr: 0.003588  min_lr: 0.003588  loss: 3.9601 (3.6625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6276 (0.6567)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [78]  [ 800/2502]  eta: 0:09:31  lr: 0.003587  min_lr: 0.003587  loss: 3.5298 (3.6579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6600 (0.6496)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [78]  [1000/2502]  eta: 0:08:23  lr: 0.003586  min_lr: 0.003586  loss: 3.9440 (3.6665)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6742 (0.6517)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [78]  [1200/2502]  eta: 0:07:16  lr: 0.003585  min_lr: 0.003585  loss: 3.6995 (3.6749)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6628 (0.6558)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [78]  [1400/2502]  eta: 0:06:09  lr: 0.003584  min_lr: 0.003584  loss: 3.8558 (3.6780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6010 (0.6581)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [78]  [1600/2502]  eta: 0:05:02  lr: 0.003583  min_lr: 0.003583  loss: 3.8819 (3.6766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6144 (0.6536)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [78]  [1800/2502]  eta: 0:03:55  lr: 0.003582  min_lr: 0.003582  loss: 3.9669 (3.6794)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6040 (0.6533)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [78]  [2000/2502]  eta: 0:02:48  lr: 0.003580  min_lr: 0.003580  loss: 3.7904 (3.6773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5973 (0.6503)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [78]  [2200/2502]  eta: 0:01:41  lr: 0.003579  min_lr: 0.003579  loss: 3.5663 (3.6780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5927 (0.6482)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [78]  [2400/2502]  eta: 0:00:34  lr: 0.003578  min_lr: 0.003578  loss: 3.4473 (3.6783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6332 (0.6500)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [78]  [2501/2502]  eta: 0:00:00  lr: 0.003578  min_lr: 0.003578  loss: 3.3478 (3.6753)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5826 (0.6483)  time: 0.3012  data: 0.0009  max mem: 27255
Epoch: [78] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.003578  min_lr: 0.003578  loss: 3.3478 (3.6719)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5826 (0.6483)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.6514 (0.6514)  acc1: 90.0000 (90.0000)  acc5: 98.4000 (98.4000)  time: 2.8076  data: 2.6019  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9279 (1.0011)  acc1: 83.2000 (80.8364)  acc5: 95.2000 (95.5636)  time: 0.4052  data: 0.2370  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0151 (1.0382)  acc1: 78.4000 (79.6952)  acc5: 95.2000 (95.7143)  time: 0.1837  data: 0.0195  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2465 (1.1582)  acc1: 74.0000 (77.2258)  acc5: 91.2000 (93.8065)  time: 0.1998  data: 0.0358  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4808 (1.2404)  acc1: 70.0000 (75.2195)  acc5: 89.2000 (92.8293)  time: 0.1938  data: 0.0299  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4692 (1.2642)  acc1: 69.2000 (74.3520)  acc5: 89.6000 (92.6000)  time: 0.1826  data: 0.0188  max mem: 27255
Test: Total time: 0:00:12 (0.2404 s / it)
* Acc@1 74.444 Acc@5 92.714 loss 1.266
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.48%
Epoch: [79]  [   0/2502]  eta: 1:32:34  lr: 0.003578  min_lr: 0.003578  loss: 2.7174 (2.7174)  weight_decay: 0.0500 (0.0500)  time: 2.2200  data: 1.4068  max mem: 27255
Epoch: [79]  [ 200/2502]  eta: 0:13:14  lr: 0.003577  min_lr: 0.003577  loss: 3.6856 (3.6164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6459 (0.6407)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [79]  [ 400/2502]  eta: 0:11:53  lr: 0.003575  min_lr: 0.003575  loss: 3.5916 (3.6518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6486 (0.6494)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [79]  [ 600/2502]  eta: 0:10:42  lr: 0.003574  min_lr: 0.003574  loss: 3.6005 (3.6445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5944 (0.6451)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [79]  [ 800/2502]  eta: 0:09:33  lr: 0.003573  min_lr: 0.003573  loss: 4.0246 (3.6697)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6038 (0.6392)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [79]  [1000/2502]  eta: 0:08:25  lr: 0.003572  min_lr: 0.003572  loss: 3.9312 (3.6747)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5943 (0.6369)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [79]  [1200/2502]  eta: 0:07:17  lr: 0.003571  min_lr: 0.003571  loss: 3.6041 (3.6802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6539 (0.6393)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [79]  [1400/2502]  eta: 0:06:10  lr: 0.003570  min_lr: 0.003570  loss: 3.6850 (3.6800)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6759 (0.6414)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [79]  [1600/2502]  eta: 0:05:02  lr: 0.003569  min_lr: 0.003569  loss: 3.7906 (3.6789)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6269 (0.6448)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [79]  [1800/2502]  eta: 0:03:55  lr: 0.003568  min_lr: 0.003568  loss: 3.6930 (3.6801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5732 (0.6454)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [79]  [2000/2502]  eta: 0:02:48  lr: 0.003567  min_lr: 0.003567  loss: 3.8797 (3.6735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6064 (0.6442)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [79]  [2200/2502]  eta: 0:01:41  lr: 0.003565  min_lr: 0.003565  loss: 3.7382 (3.6742)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6101 (0.6442)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [79]  [2400/2502]  eta: 0:00:34  lr: 0.003564  min_lr: 0.003564  loss: 3.7143 (3.6740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6514 (0.6456)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [79]  [2501/2502]  eta: 0:00:00  lr: 0.003564  min_lr: 0.003564  loss: 3.9789 (3.6758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6115 (0.6455)  time: 0.3043  data: 0.0007  max mem: 27255
Epoch: [79] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003564  min_lr: 0.003564  loss: 3.9789 (3.6764)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6115 (0.6455)
Test:  [ 0/50]  eta: 0:02:34  loss: 0.7593 (0.7593)  acc1: 91.2000 (91.2000)  acc5: 98.0000 (98.0000)  time: 3.0926  data: 2.8870  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.2259 (1.2046)  acc1: 80.0000 (80.1455)  acc5: 95.2000 (95.5273)  time: 0.4370  data: 0.2693  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.2191 (1.1947)  acc1: 77.6000 (79.1810)  acc5: 95.2000 (95.7524)  time: 0.2000  data: 0.0360  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3368 (1.2922)  acc1: 73.2000 (76.5936)  acc5: 93.2000 (94.0129)  time: 0.2233  data: 0.0594  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5430 (1.3593)  acc1: 70.4000 (74.6146)  acc5: 89.6000 (92.9366)  time: 0.2041  data: 0.0378  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5125 (1.3738)  acc1: 69.2000 (73.8640)  acc5: 89.6000 (92.6240)  time: 0.1887  data: 0.0191  max mem: 27255
Test: Total time: 0:00:12 (0.2566 s / it)
* Acc@1 74.256 Acc@5 92.582 loss 1.377
Accuracy of the model on the 50000 test images: 74.3%
Max accuracy: 74.48%
Epoch: [80]  [   0/2502]  eta: 1:36:46  lr: 0.003564  min_lr: 0.003564  loss: 3.0484 (3.0484)  weight_decay: 0.0500 (0.0500)  time: 2.3205  data: 1.3027  max mem: 27255
Epoch: [80]  [ 200/2502]  eta: 0:13:13  lr: 0.003563  min_lr: 0.003563  loss: 3.7034 (3.6438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5889 (0.6071)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [80]  [ 400/2502]  eta: 0:11:52  lr: 0.003562  min_lr: 0.003562  loss: 3.7352 (3.6512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6567 (0.6424)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [80]  [ 600/2502]  eta: 0:10:41  lr: 0.003560  min_lr: 0.003560  loss: 3.8820 (3.6445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6263 (0.6423)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [80]  [ 800/2502]  eta: 0:09:33  lr: 0.003559  min_lr: 0.003559  loss: 3.6248 (3.6241)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6030 (0.6380)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [80]  [1000/2502]  eta: 0:08:25  lr: 0.003558  min_lr: 0.003558  loss: 3.5856 (3.6199)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6692 (0.6404)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [80]  [1200/2502]  eta: 0:07:17  lr: 0.003557  min_lr: 0.003557  loss: 3.3518 (3.6201)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6106 (0.6452)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [80]  [1400/2502]  eta: 0:06:09  lr: 0.003556  min_lr: 0.003556  loss: 3.2548 (3.6144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6483 (0.6500)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [80]  [1600/2502]  eta: 0:05:02  lr: 0.003555  min_lr: 0.003555  loss: 3.5370 (3.6142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6777 (0.6520)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [80]  [1800/2502]  eta: 0:03:55  lr: 0.003554  min_lr: 0.003554  loss: 3.8225 (3.6207)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6226 (0.6545)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [80]  [2000/2502]  eta: 0:02:48  lr: 0.003553  min_lr: 0.003553  loss: 3.9905 (3.6280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6405 (0.6567)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [80]  [2200/2502]  eta: 0:01:41  lr: 0.003551  min_lr: 0.003551  loss: 4.0374 (3.6363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6183 (0.6555)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [80]  [2400/2502]  eta: 0:00:34  lr: 0.003550  min_lr: 0.003550  loss: 3.8219 (3.6412)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6231 (0.6546)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [80]  [2501/2502]  eta: 0:00:00  lr: 0.003550  min_lr: 0.003550  loss: 3.8799 (3.6400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6189 (0.6544)  time: 0.2997  data: 0.0009  max mem: 27255
Epoch: [80] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003550  min_lr: 0.003550  loss: 3.8799 (3.6676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6189 (0.6544)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.7930 (0.7930)  acc1: 89.6000 (89.6000)  acc5: 98.0000 (98.0000)  time: 2.8092  data: 2.6079  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0032 (1.0603)  acc1: 81.6000 (80.4000)  acc5: 96.4000 (96.1091)  time: 0.4344  data: 0.2671  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0316 (1.0520)  acc1: 78.8000 (79.9048)  acc5: 96.0000 (96.0191)  time: 0.1938  data: 0.0298  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2126 (1.1875)  acc1: 76.8000 (77.3806)  acc5: 92.8000 (94.0258)  time: 0.2007  data: 0.0368  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5292 (1.2727)  acc1: 70.0000 (75.3463)  acc5: 89.2000 (92.9366)  time: 0.2021  data: 0.0382  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5292 (1.2942)  acc1: 70.0000 (74.7200)  acc5: 89.2000 (92.6480)  time: 0.1899  data: 0.0262  max mem: 27255
Test: Total time: 0:00:12 (0.2506 s / it)
* Acc@1 74.448 Acc@5 92.856 loss 1.304
Accuracy of the model on the 50000 test images: 74.4%
Max accuracy: 74.48%
Epoch: [81]  [   0/2502]  eta: 1:30:03  lr: 0.003550  min_lr: 0.003550  loss: 4.0533 (4.0533)  weight_decay: 0.0500 (0.0500)  time: 2.1595  data: 1.5453  max mem: 27255
Epoch: [81]  [ 200/2502]  eta: 0:13:11  lr: 0.003549  min_lr: 0.003549  loss: 3.5764 (3.6626)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6249 (0.6441)  time: 0.3411  data: 0.0003  max mem: 27255
Epoch: [81]  [ 400/2502]  eta: 0:11:53  lr: 0.003547  min_lr: 0.003547  loss: 3.5943 (3.6238)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5693 (0.6176)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [81]  [ 600/2502]  eta: 0:10:41  lr: 0.003546  min_lr: 0.003546  loss: 3.8818 (3.6375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5751 (0.6270)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [81]  [ 800/2502]  eta: 0:09:33  lr: 0.003545  min_lr: 0.003545  loss: 3.6069 (3.6506)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5821 (inf)  time: 0.3440  data: 0.0005  max mem: 27255
Epoch: [81]  [1000/2502]  eta: 0:08:25  lr: 0.003544  min_lr: 0.003544  loss: 3.6408 (3.6600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6292 (inf)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [81]  [1200/2502]  eta: 0:07:17  lr: 0.003543  min_lr: 0.003543  loss: 3.8513 (3.6607)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6718 (inf)  time: 0.3421  data: 0.0005  max mem: 27255
Epoch: [81]  [1400/2502]  eta: 0:06:10  lr: 0.003542  min_lr: 0.003542  loss: 3.6273 (3.6671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7151 (inf)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [81]  [1600/2502]  eta: 0:05:02  lr: 0.003541  min_lr: 0.003541  loss: 3.5496 (3.6663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6432 (inf)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [81]  [1800/2502]  eta: 0:03:55  lr: 0.003539  min_lr: 0.003539  loss: 3.9768 (3.6717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5891 (inf)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [81]  [2000/2502]  eta: 0:02:48  lr: 0.003538  min_lr: 0.003538  loss: 3.5649 (3.6698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6045 (inf)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [81]  [2200/2502]  eta: 0:01:41  lr: 0.003537  min_lr: 0.003537  loss: 3.5790 (3.6696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6159 (inf)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [81]  [2400/2502]  eta: 0:00:34  lr: 0.003536  min_lr: 0.003536  loss: 3.8368 (3.6748)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6091 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [81]  [2501/2502]  eta: 0:00:00  lr: 0.003535  min_lr: 0.003535  loss: 3.4972 (3.6699)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5927 (inf)  time: 0.3000  data: 0.0008  max mem: 27255
Epoch: [81] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003535  min_lr: 0.003535  loss: 3.4972 (3.6595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5927 (inf)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.7711 (0.7711)  acc1: 90.0000 (90.0000)  acc5: 98.0000 (98.0000)  time: 3.1874  data: 2.9902  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9786 (0.9995)  acc1: 80.0000 (81.2364)  acc5: 95.6000 (95.9273)  time: 0.4649  data: 0.2979  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9740 (0.9919)  acc1: 78.4000 (80.2667)  acc5: 95.6000 (96.0762)  time: 0.1843  data: 0.0152  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1833 (1.1229)  acc1: 74.4000 (77.7806)  acc5: 93.2000 (94.3484)  time: 0.1745  data: 0.0053  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4814 (1.2142)  acc1: 69.2000 (75.6488)  acc5: 90.0000 (93.2976)  time: 0.1940  data: 0.0301  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4814 (1.2390)  acc1: 68.4000 (74.7920)  acc5: 89.6000 (93.0480)  time: 0.1896  data: 0.0257  max mem: 27255
Test: Total time: 0:00:12 (0.2464 s / it)
* Acc@1 74.796 Acc@5 93.040 loss 1.241
Accuracy of the model on the 50000 test images: 74.8%
Max accuracy: 74.80%
Epoch: [82]  [   0/2502]  eta: 1:30:51  lr: 0.003535  min_lr: 0.003535  loss: 3.7933 (3.7933)  weight_decay: 0.0500 (0.0500)  time: 2.1791  data: 1.8392  max mem: 27255
Epoch: [82]  [ 200/2502]  eta: 0:13:12  lr: 0.003534  min_lr: 0.003534  loss: 3.7167 (3.6726)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6616 (0.6751)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [82]  [ 400/2502]  eta: 0:11:53  lr: 0.003533  min_lr: 0.003533  loss: 3.5962 (3.6867)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6965 (0.6615)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [82]  [ 600/2502]  eta: 0:10:42  lr: 0.003532  min_lr: 0.003532  loss: 3.7250 (3.6698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6041 (0.6530)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [82]  [ 800/2502]  eta: 0:09:33  lr: 0.003531  min_lr: 0.003531  loss: 3.6946 (3.6595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6714 (0.6566)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [82]  [1000/2502]  eta: 0:08:25  lr: 0.003530  min_lr: 0.003530  loss: 3.6489 (3.6677)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5941 (0.6601)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [82]  [1200/2502]  eta: 0:07:17  lr: 0.003528  min_lr: 0.003528  loss: 3.8872 (3.6712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5689 (0.6596)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [82]  [1400/2502]  eta: 0:06:10  lr: 0.003527  min_lr: 0.003527  loss: 3.9649 (3.6822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6173 (0.6553)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [82]  [1600/2502]  eta: 0:05:02  lr: 0.003526  min_lr: 0.003526  loss: 3.8584 (3.6809)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6578 (0.6570)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [82]  [1800/2502]  eta: 0:03:55  lr: 0.003525  min_lr: 0.003525  loss: 3.7267 (3.6834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6912 (0.6614)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [82]  [2000/2502]  eta: 0:02:48  lr: 0.003524  min_lr: 0.003524  loss: 3.5664 (3.6850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6117 (0.6610)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [82]  [2200/2502]  eta: 0:01:41  lr: 0.003523  min_lr: 0.003523  loss: 3.8727 (3.6905)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5995 (0.6570)  time: 0.3365  data: 0.0004  max mem: 27255
Epoch: [82]  [2400/2502]  eta: 0:00:34  lr: 0.003522  min_lr: 0.003522  loss: 3.5866 (3.6828)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5750 (0.6574)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [82]  [2501/2502]  eta: 0:00:00  lr: 0.003521  min_lr: 0.003521  loss: 3.4116 (3.6802)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6067 (0.6556)  time: 0.3006  data: 0.0008  max mem: 27255
Epoch: [82] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003521  min_lr: 0.003521  loss: 3.4116 (3.6725)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6067 (0.6556)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.7509 (0.7509)  acc1: 89.2000 (89.2000)  acc5: 97.6000 (97.6000)  time: 2.8119  data: 2.6176  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0260 (1.0642)  acc1: 80.4000 (80.2545)  acc5: 96.4000 (95.3091)  time: 0.4187  data: 0.2520  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0232 (1.0518)  acc1: 80.4000 (79.8476)  acc5: 96.0000 (95.7143)  time: 0.1983  data: 0.0344  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2411 (1.1735)  acc1: 74.4000 (77.3290)  acc5: 92.4000 (93.8581)  time: 0.2312  data: 0.0673  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5030 (1.2640)  acc1: 69.2000 (74.9854)  acc5: 89.6000 (92.8781)  time: 0.2458  data: 0.0821  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5128 (1.2834)  acc1: 69.2000 (74.2640)  acc5: 89.6000 (92.5840)  time: 0.2051  data: 0.0415  max mem: 27255
Test: Total time: 0:00:13 (0.2651 s / it)
* Acc@1 74.566 Acc@5 92.756 loss 1.275
Accuracy of the model on the 50000 test images: 74.6%
Max accuracy: 74.80%
Epoch: [83]  [   0/2502]  eta: 1:43:13  lr: 0.003521  min_lr: 0.003521  loss: 3.5357 (3.5357)  weight_decay: 0.0500 (0.0500)  time: 2.4754  data: 1.3027  max mem: 27255
Epoch: [83]  [ 200/2502]  eta: 0:13:14  lr: 0.003520  min_lr: 0.003520  loss: 3.7419 (3.6261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6427 (0.6503)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [83]  [ 400/2502]  eta: 0:11:55  lr: 0.003519  min_lr: 0.003519  loss: 3.8217 (3.6246)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6192 (0.6464)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [83]  [ 600/2502]  eta: 0:10:45  lr: 0.003517  min_lr: 0.003517  loss: 3.8379 (3.6432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6100 (0.6422)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [83]  [ 800/2502]  eta: 0:09:34  lr: 0.003516  min_lr: 0.003516  loss: 3.5683 (3.6419)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6294 (0.6411)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [83]  [1000/2502]  eta: 0:08:26  lr: 0.003515  min_lr: 0.003515  loss: 3.8639 (3.6393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6970 (0.6460)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [83]  [1200/2502]  eta: 0:07:18  lr: 0.003514  min_lr: 0.003514  loss: 3.6432 (3.6425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6336 (0.6502)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [83]  [1400/2502]  eta: 0:06:10  lr: 0.003513  min_lr: 0.003513  loss: 3.7918 (3.6549)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6543 (0.6489)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [83]  [1600/2502]  eta: 0:05:03  lr: 0.003512  min_lr: 0.003512  loss: 3.9046 (3.6606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6520 (0.6487)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [83]  [1800/2502]  eta: 0:03:55  lr: 0.003510  min_lr: 0.003510  loss: 3.7098 (3.6607)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6652 (0.6482)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [83]  [2000/2502]  eta: 0:02:48  lr: 0.003509  min_lr: 0.003509  loss: 3.8608 (3.6631)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5844 (0.6463)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [83]  [2200/2502]  eta: 0:01:41  lr: 0.003508  min_lr: 0.003508  loss: 4.0169 (3.6671)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6539 (0.6469)  time: 0.3354  data: 0.0003  max mem: 27255
Epoch: [83]  [2400/2502]  eta: 0:00:34  lr: 0.003507  min_lr: 0.003507  loss: 3.8136 (3.6656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6669 (0.6504)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [83]  [2501/2502]  eta: 0:00:00  lr: 0.003506  min_lr: 0.003506  loss: 3.9802 (3.6646)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5989 (0.6493)  time: 0.3004  data: 0.0007  max mem: 27255
Epoch: [83] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003506  min_lr: 0.003506  loss: 3.9802 (3.6619)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5989 (0.6493)
Test:  [ 0/50]  eta: 0:02:13  loss: 0.7477 (0.7477)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 2.6685  data: 2.4709  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9705 (1.0522)  acc1: 81.6000 (80.3636)  acc5: 96.0000 (95.6364)  time: 0.4361  data: 0.2690  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.0442 (1.0722)  acc1: 79.6000 (79.7905)  acc5: 96.0000 (95.8095)  time: 0.2646  data: 0.1001  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2407 (1.1888)  acc1: 74.4000 (77.2258)  acc5: 92.8000 (94.0516)  time: 0.2694  data: 0.1046  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5301 (1.2697)  acc1: 68.4000 (75.0927)  acc5: 90.0000 (93.1317)  time: 0.2003  data: 0.0361  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5063 (1.2815)  acc1: 68.4000 (74.5520)  acc5: 90.0000 (93.0000)  time: 0.1732  data: 0.0095  max mem: 27255
Test: Total time: 0:00:13 (0.2717 s / it)
* Acc@1 74.646 Acc@5 92.896 loss 1.281
Accuracy of the model on the 50000 test images: 74.6%
Max accuracy: 74.80%
Epoch: [84]  [   0/2502]  eta: 1:26:13  lr: 0.003506  min_lr: 0.003506  loss: 3.8184 (3.8184)  weight_decay: 0.0500 (0.0500)  time: 2.0679  data: 1.5303  max mem: 27255
Epoch: [84]  [ 200/2502]  eta: 0:13:08  lr: 0.003505  min_lr: 0.003505  loss: 3.7737 (3.6217)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6116 (0.6507)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [84]  [ 400/2502]  eta: 0:11:51  lr: 0.003504  min_lr: 0.003504  loss: 3.7951 (3.6272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6193 (0.6621)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [84]  [ 600/2502]  eta: 0:10:41  lr: 0.003503  min_lr: 0.003503  loss: 3.2980 (3.6358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6101 (0.6536)  time: 0.3426  data: 0.0003  max mem: 27255
Epoch: [84]  [ 800/2502]  eta: 0:09:32  lr: 0.003502  min_lr: 0.003502  loss: 3.5144 (3.6226)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6123 (0.6509)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [84]  [1000/2502]  eta: 0:08:24  lr: 0.003500  min_lr: 0.003500  loss: 4.0045 (3.6356)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6401 (0.6482)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [84]  [1200/2502]  eta: 0:07:16  lr: 0.003499  min_lr: 0.003499  loss: 3.8292 (3.6421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6191 (0.6466)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [84]  [1400/2502]  eta: 0:06:09  lr: 0.003498  min_lr: 0.003498  loss: 3.8780 (3.6355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6600 (0.6463)  time: 0.3434  data: 0.0005  max mem: 27255
Epoch: [84]  [1600/2502]  eta: 0:05:02  lr: 0.003497  min_lr: 0.003497  loss: 3.7728 (3.6344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6658 (0.6468)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [84]  [1800/2502]  eta: 0:03:55  lr: 0.003496  min_lr: 0.003496  loss: 3.8601 (3.6408)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6142 (0.6486)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [84]  [2000/2502]  eta: 0:02:48  lr: 0.003494  min_lr: 0.003494  loss: 4.1310 (3.6493)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6068 (0.6492)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [84]  [2200/2502]  eta: 0:01:41  lr: 0.003493  min_lr: 0.003493  loss: 3.6369 (3.6499)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6330 (0.6491)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [84]  [2400/2502]  eta: 0:00:34  lr: 0.003492  min_lr: 0.003492  loss: 3.9002 (3.6498)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6072 (0.6479)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [84]  [2501/2502]  eta: 0:00:00  lr: 0.003491  min_lr: 0.003491  loss: 3.9197 (3.6529)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (0.6461)  time: 0.2999  data: 0.0008  max mem: 27255
Epoch: [84] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003491  min_lr: 0.003491  loss: 3.9197 (3.6439)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5948 (0.6461)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.7415 (0.7415)  acc1: 90.8000 (90.8000)  acc5: 98.0000 (98.0000)  time: 2.6154  data: 2.4169  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0993 (1.1050)  acc1: 78.8000 (80.2182)  acc5: 96.4000 (95.7455)  time: 0.4192  data: 0.2520  max mem: 27255
Test:  [20/50]  eta: 0:00:08  loss: 1.1561 (1.1305)  acc1: 78.8000 (79.9429)  acc5: 95.2000 (96.0000)  time: 0.1820  data: 0.0180  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2733 (1.2560)  acc1: 74.4000 (76.9936)  acc5: 94.0000 (94.1161)  time: 0.1865  data: 0.0225  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5822 (1.3326)  acc1: 70.0000 (74.8293)  acc5: 89.2000 (92.9561)  time: 0.2019  data: 0.0372  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5606 (1.3463)  acc1: 69.2000 (74.2400)  acc5: 89.2000 (92.7920)  time: 0.1840  data: 0.0195  max mem: 27255
Test: Total time: 0:00:11 (0.2392 s / it)
* Acc@1 74.346 Acc@5 92.778 loss 1.344
Accuracy of the model on the 50000 test images: 74.3%
Max accuracy: 74.80%
Epoch: [85]  [   0/2502]  eta: 1:27:27  lr: 0.003491  min_lr: 0.003491  loss: 3.2948 (3.2948)  weight_decay: 0.0500 (0.0500)  time: 2.0973  data: 1.4909  max mem: 27255
Epoch: [85]  [ 200/2502]  eta: 0:13:10  lr: 0.003490  min_lr: 0.003490  loss: 3.6090 (3.6031)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6143 (0.6826)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [85]  [ 400/2502]  eta: 0:11:52  lr: 0.003489  min_lr: 0.003489  loss: 3.7061 (3.6422)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6560 (0.6971)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [85]  [ 600/2502]  eta: 0:10:41  lr: 0.003488  min_lr: 0.003488  loss: 3.7149 (3.6441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6427 (0.6738)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [85]  [ 800/2502]  eta: 0:09:32  lr: 0.003487  min_lr: 0.003487  loss: 3.5494 (3.6293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5994 (0.6640)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [85]  [1000/2502]  eta: 0:08:24  lr: 0.003485  min_lr: 0.003485  loss: 3.9343 (3.6441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6262 (0.6672)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [85]  [1200/2502]  eta: 0:07:17  lr: 0.003484  min_lr: 0.003484  loss: 3.8241 (3.6430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6361 (0.6616)  time: 0.3410  data: 0.0004  max mem: 27255
Epoch: [85]  [1400/2502]  eta: 0:06:09  lr: 0.003483  min_lr: 0.003483  loss: 3.5587 (3.6393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6222 (0.6543)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [85]  [1600/2502]  eta: 0:05:02  lr: 0.003482  min_lr: 0.003482  loss: 3.6682 (3.6369)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6880 (0.6589)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [85]  [1800/2502]  eta: 0:03:55  lr: 0.003481  min_lr: 0.003481  loss: 3.7025 (3.6363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6542 (0.6558)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [85]  [2000/2502]  eta: 0:02:48  lr: 0.003479  min_lr: 0.003479  loss: 3.7130 (3.6401)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6166 (0.6569)  time: 0.3353  data: 0.0006  max mem: 27255
Epoch: [85]  [2200/2502]  eta: 0:01:41  lr: 0.003478  min_lr: 0.003478  loss: 3.7470 (3.6432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6480 (0.6549)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [85]  [2400/2502]  eta: 0:00:34  lr: 0.003477  min_lr: 0.003477  loss: 3.6957 (3.6449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6346 (0.6564)  time: 0.3327  data: 0.0003  max mem: 27255
Epoch: [85]  [2501/2502]  eta: 0:00:00  lr: 0.003476  min_lr: 0.003476  loss: 3.7419 (3.6444)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5899 (0.6554)  time: 0.2989  data: 0.0007  max mem: 27255
Epoch: [85] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003476  min_lr: 0.003476  loss: 3.7419 (3.6534)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5899 (0.6554)
Test:  [ 0/50]  eta: 0:03:44  loss: 0.7981 (0.7981)  acc1: 87.6000 (87.6000)  acc5: 98.0000 (98.0000)  time: 4.4893  data: 4.2806  max mem: 27255
Test:  [10/50]  eta: 0:00:29  loss: 1.0079 (1.0659)  acc1: 80.8000 (80.0727)  acc5: 96.4000 (96.2182)  time: 0.7409  data: 0.5729  max mem: 27255
Test:  [20/50]  eta: 0:00:16  loss: 1.0667 (1.0621)  acc1: 79.6000 (79.7905)  acc5: 96.0000 (96.2286)  time: 0.3650  data: 0.1988  max mem: 27255
Test:  [30/50]  eta: 0:00:10  loss: 1.1522 (1.1640)  acc1: 74.0000 (77.3419)  acc5: 93.6000 (94.4387)  time: 0.3786  data: 0.2096  max mem: 27255
Test:  [40/50]  eta: 0:00:04  loss: 1.4696 (1.2420)  acc1: 70.8000 (75.0146)  acc5: 89.6000 (93.3854)  time: 0.2976  data: 0.1297  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4085 (1.2552)  acc1: 70.4000 (74.5200)  acc5: 90.4000 (93.2320)  time: 0.2958  data: 0.1297  max mem: 27255
Test: Total time: 0:00:19 (0.3860 s / it)
* Acc@1 74.676 Acc@5 93.002 loss 1.257
Accuracy of the model on the 50000 test images: 74.7%
Max accuracy: 74.80%
Epoch: [86]  [   0/2502]  eta: 1:23:39  lr: 0.003476  min_lr: 0.003476  loss: 2.9192 (2.9192)  weight_decay: 0.0500 (0.0500)  time: 2.0062  data: 1.5174  max mem: 27255
Epoch: [86]  [ 200/2502]  eta: 0:13:12  lr: 0.003475  min_lr: 0.003475  loss: 3.6647 (3.6722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6459 (0.6383)  time: 0.3359  data: 0.0005  max mem: 27255
Epoch: [86]  [ 400/2502]  eta: 0:11:54  lr: 0.003474  min_lr: 0.003474  loss: 3.6978 (3.6085)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6461 (0.6444)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [86]  [ 600/2502]  eta: 0:10:42  lr: 0.003473  min_lr: 0.003473  loss: 3.9888 (3.6104)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6603 (0.6494)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [86]  [ 800/2502]  eta: 0:09:33  lr: 0.003472  min_lr: 0.003472  loss: 3.7104 (3.6153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5967 (0.6453)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [86]  [1000/2502]  eta: 0:08:25  lr: 0.003470  min_lr: 0.003470  loss: 3.7085 (3.6204)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5937 (0.6426)  time: 0.3422  data: 0.0005  max mem: 27255
Epoch: [86]  [1200/2502]  eta: 0:07:17  lr: 0.003469  min_lr: 0.003469  loss: 3.6851 (3.6265)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6566 (0.6480)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [86]  [1400/2502]  eta: 0:06:10  lr: 0.003468  min_lr: 0.003468  loss: 3.8215 (3.6241)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7021 (0.6509)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [86]  [1600/2502]  eta: 0:05:02  lr: 0.003467  min_lr: 0.003467  loss: 3.8316 (3.6245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6411 (0.6516)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [86]  [1800/2502]  eta: 0:03:55  lr: 0.003465  min_lr: 0.003465  loss: 3.5634 (3.6265)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5809 (0.6541)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [86]  [2000/2502]  eta: 0:02:48  lr: 0.003464  min_lr: 0.003464  loss: 3.7138 (3.6319)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6360 (0.6519)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [86]  [2200/2502]  eta: 0:01:41  lr: 0.003463  min_lr: 0.003463  loss: 3.6431 (3.6358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6246 (0.6525)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [86]  [2400/2502]  eta: 0:00:34  lr: 0.003462  min_lr: 0.003462  loss: 3.6496 (3.6409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6137 (0.6505)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [86]  [2501/2502]  eta: 0:00:00  lr: 0.003461  min_lr: 0.003461  loss: 3.7012 (3.6410)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6543 (0.6508)  time: 0.3005  data: 0.0009  max mem: 27255
Epoch: [86] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003461  min_lr: 0.003461  loss: 3.7012 (3.6437)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6543 (0.6508)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.7378 (0.7378)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 3.2586  data: 3.0524  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9751 (1.0217)  acc1: 84.0000 (82.2182)  acc5: 94.8000 (95.9273)  time: 0.4916  data: 0.3229  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0308 (1.0471)  acc1: 80.4000 (80.4191)  acc5: 96.0000 (95.9429)  time: 0.2058  data: 0.0415  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2563 (1.1661)  acc1: 74.8000 (77.9226)  acc5: 93.2000 (94.0774)  time: 0.1929  data: 0.0285  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4846 (1.2463)  acc1: 70.0000 (75.7854)  acc5: 90.0000 (93.0537)  time: 0.1929  data: 0.0285  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4603 (1.2621)  acc1: 69.6000 (75.1760)  acc5: 90.4000 (93.0000)  time: 0.1919  data: 0.0284  max mem: 27255
Test: Total time: 0:00:12 (0.2562 s / it)
* Acc@1 75.018 Acc@5 93.068 loss 1.272
Accuracy of the model on the 50000 test images: 75.0%
Max accuracy: 75.02%
Epoch: [87]  [   0/2502]  eta: 1:31:30  lr: 0.003461  min_lr: 0.003461  loss: 3.8524 (3.8524)  weight_decay: 0.0500 (0.0500)  time: 2.1946  data: 1.8422  max mem: 27255
Epoch: [87]  [ 200/2502]  eta: 0:13:11  lr: 0.003460  min_lr: 0.003460  loss: 3.5998 (3.5883)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6800 (0.6891)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [87]  [ 400/2502]  eta: 0:11:53  lr: 0.003459  min_lr: 0.003459  loss: 3.7616 (3.6215)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6176 (0.6772)  time: 0.3361  data: 0.0004  max mem: 27255
Epoch: [87]  [ 600/2502]  eta: 0:10:43  lr: 0.003457  min_lr: 0.003457  loss: 3.6865 (3.6385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5846 (0.6498)  time: 0.3361  data: 0.0005  max mem: 27255
Epoch: [87]  [ 800/2502]  eta: 0:09:33  lr: 0.003456  min_lr: 0.003456  loss: 3.7784 (3.6396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6210 (0.6559)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [87]  [1000/2502]  eta: 0:08:25  lr: 0.003455  min_lr: 0.003455  loss: 3.8325 (3.6292)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6227 (0.6602)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [87]  [1200/2502]  eta: 0:07:18  lr: 0.003454  min_lr: 0.003454  loss: 3.7454 (3.6312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6277 (0.6573)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [87]  [1400/2502]  eta: 0:06:10  lr: 0.003453  min_lr: 0.003453  loss: 3.7503 (3.6346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6258 (0.6541)  time: 0.3324  data: 0.0004  max mem: 27255
Epoch: [87]  [1600/2502]  eta: 0:05:02  lr: 0.003451  min_lr: 0.003451  loss: 3.6057 (3.6380)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6338 (0.6545)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [87]  [1800/2502]  eta: 0:03:55  lr: 0.003450  min_lr: 0.003450  loss: 3.6500 (3.6346)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5949 (0.6562)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [87]  [2000/2502]  eta: 0:02:48  lr: 0.003449  min_lr: 0.003449  loss: 3.8785 (3.6388)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6837 (0.6594)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [87]  [2200/2502]  eta: 0:01:41  lr: 0.003448  min_lr: 0.003448  loss: 3.4647 (3.6358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6101 (0.6588)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [87]  [2400/2502]  eta: 0:00:34  lr: 0.003446  min_lr: 0.003446  loss: 3.6175 (3.6327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6079 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [87]  [2501/2502]  eta: 0:00:00  lr: 0.003446  min_lr: 0.003446  loss: 3.8309 (3.6308)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6482 (inf)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [87] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003446  min_lr: 0.003446  loss: 3.8309 (3.6348)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6482 (inf)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.6943 (0.6943)  acc1: 89.6000 (89.6000)  acc5: 96.8000 (96.8000)  time: 2.7683  data: 2.5638  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9719 (1.0656)  acc1: 80.4000 (80.6182)  acc5: 96.4000 (95.5273)  time: 0.4349  data: 0.2672  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0298 (1.0622)  acc1: 79.2000 (79.9429)  acc5: 96.4000 (95.9619)  time: 0.2189  data: 0.0549  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2508 (1.1949)  acc1: 74.4000 (77.5097)  acc5: 93.2000 (94.3226)  time: 0.2557  data: 0.0869  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5287 (1.2823)  acc1: 70.0000 (75.3659)  acc5: 89.6000 (93.2878)  time: 0.2339  data: 0.0652  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5090 (1.2974)  acc1: 68.8000 (74.8000)  acc5: 90.4000 (93.0480)  time: 0.1786  data: 0.0145  max mem: 27255
Test: Total time: 0:00:13 (0.2680 s / it)
* Acc@1 74.786 Acc@5 93.108 loss 1.300
Accuracy of the model on the 50000 test images: 74.8%
Max accuracy: 75.02%
Epoch: [88]  [   0/2502]  eta: 1:34:48  lr: 0.003446  min_lr: 0.003446  loss: 4.4926 (4.4926)  weight_decay: 0.0500 (0.0500)  time: 2.2736  data: 1.8298  max mem: 27255
Epoch: [88]  [ 200/2502]  eta: 0:13:13  lr: 0.003444  min_lr: 0.003444  loss: 3.6684 (3.5876)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6662 (0.6533)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [88]  [ 400/2502]  eta: 0:11:52  lr: 0.003443  min_lr: 0.003443  loss: 3.6715 (3.6045)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (0.6521)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [88]  [ 600/2502]  eta: 0:10:41  lr: 0.003442  min_lr: 0.003442  loss: 3.6644 (3.6329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5922 (0.6446)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [88]  [ 800/2502]  eta: 0:09:32  lr: 0.003441  min_lr: 0.003441  loss: 3.3998 (3.6354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6099 (0.6377)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [88]  [1000/2502]  eta: 0:08:24  lr: 0.003440  min_lr: 0.003440  loss: 3.5951 (3.6351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6533 (0.6373)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [88]  [1200/2502]  eta: 0:07:17  lr: 0.003438  min_lr: 0.003438  loss: 3.7093 (3.6325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5985 (0.6388)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [88]  [1400/2502]  eta: 0:06:10  lr: 0.003437  min_lr: 0.003437  loss: 3.9465 (3.6304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6515 (0.6433)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [88]  [1600/2502]  eta: 0:05:02  lr: 0.003436  min_lr: 0.003436  loss: 3.8642 (3.6281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6423 (0.6433)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [88]  [1800/2502]  eta: 0:03:55  lr: 0.003435  min_lr: 0.003435  loss: 3.7568 (3.6287)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6088 (0.6412)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [88]  [2000/2502]  eta: 0:02:48  lr: 0.003433  min_lr: 0.003433  loss: 3.8538 (3.6315)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5975 (0.6411)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [88]  [2200/2502]  eta: 0:01:41  lr: 0.003432  min_lr: 0.003432  loss: 3.7338 (3.6376)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6524 (0.6439)  time: 0.3423  data: 0.0006  max mem: 27255
Epoch: [88]  [2400/2502]  eta: 0:00:34  lr: 0.003431  min_lr: 0.003431  loss: 3.6497 (3.6362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6587 (0.6432)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [88]  [2501/2502]  eta: 0:00:00  lr: 0.003430  min_lr: 0.003430  loss: 3.8251 (3.6355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5861 (0.6438)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [88] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003430  min_lr: 0.003430  loss: 3.8251 (3.6417)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5861 (0.6438)
Test:  [ 0/50]  eta: 0:02:54  loss: 0.7393 (0.7393)  acc1: 90.4000 (90.4000)  acc5: 97.6000 (97.6000)  time: 3.4943  data: 3.2924  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0874 (1.0881)  acc1: 81.2000 (81.0545)  acc5: 96.4000 (95.7455)  time: 0.4675  data: 0.2998  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0874 (1.0878)  acc1: 78.4000 (80.1143)  acc5: 96.0000 (95.9238)  time: 0.1647  data: 0.0005  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2331 (1.1965)  acc1: 74.8000 (77.6000)  acc5: 93.6000 (94.2839)  time: 0.2057  data: 0.0417  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5148 (1.2759)  acc1: 70.8000 (75.5805)  acc5: 90.0000 (93.1902)  time: 0.2077  data: 0.0437  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5148 (1.2871)  acc1: 70.0000 (75.1280)  acc5: 90.4000 (93.0880)  time: 0.2073  data: 0.0435  max mem: 27255
Test: Total time: 0:00:12 (0.2501 s / it)
* Acc@1 74.844 Acc@5 93.194 loss 1.289
Accuracy of the model on the 50000 test images: 74.8%
Max accuracy: 75.02%
Epoch: [89]  [   0/2502]  eta: 1:43:06  lr: 0.003430  min_lr: 0.003430  loss: 3.3032 (3.3032)  weight_decay: 0.0500 (0.0500)  time: 2.4728  data: 1.5365  max mem: 27255
Epoch: [89]  [ 200/2502]  eta: 0:13:16  lr: 0.003429  min_lr: 0.003429  loss: 3.8022 (3.5961)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6383 (0.6675)  time: 0.3441  data: 0.0004  max mem: 27255
Epoch: [89]  [ 400/2502]  eta: 0:11:54  lr: 0.003428  min_lr: 0.003428  loss: 3.5578 (3.6219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6366 (0.6493)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [89]  [ 600/2502]  eta: 0:10:43  lr: 0.003426  min_lr: 0.003426  loss: 3.8058 (3.6269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6686 (0.6575)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [89]  [ 800/2502]  eta: 0:09:33  lr: 0.003425  min_lr: 0.003425  loss: 3.7504 (3.6164)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6146 (0.6528)  time: 0.3350  data: 0.0003  max mem: 27255
Epoch: [89]  [1000/2502]  eta: 0:08:25  lr: 0.003424  min_lr: 0.003424  loss: 3.6007 (3.6267)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6513 (0.6522)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [89]  [1200/2502]  eta: 0:07:17  lr: 0.003423  min_lr: 0.003423  loss: 3.8836 (3.6251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6283 (0.6540)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [89]  [1400/2502]  eta: 0:06:09  lr: 0.003421  min_lr: 0.003421  loss: 3.8645 (3.6301)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6453 (0.6556)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [89]  [1600/2502]  eta: 0:05:02  lr: 0.003420  min_lr: 0.003420  loss: 3.8677 (3.6348)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5822 (0.6537)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [89]  [1800/2502]  eta: 0:03:55  lr: 0.003419  min_lr: 0.003419  loss: 3.8135 (3.6331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5747 (0.6508)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [89]  [2000/2502]  eta: 0:02:48  lr: 0.003418  min_lr: 0.003418  loss: 3.8754 (3.6330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6411 (0.6521)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [89]  [2200/2502]  eta: 0:01:41  lr: 0.003416  min_lr: 0.003416  loss: 3.6529 (3.6306)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6360 (0.6505)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [89]  [2400/2502]  eta: 0:00:34  lr: 0.003415  min_lr: 0.003415  loss: 3.7410 (3.6263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6258 (0.6498)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [89]  [2501/2502]  eta: 0:00:00  lr: 0.003414  min_lr: 0.003414  loss: 3.6583 (3.6252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5984 (0.6481)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [89] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003414  min_lr: 0.003414  loss: 3.6583 (3.6357)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5984 (0.6481)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.8098 (0.8098)  acc1: 90.8000 (90.8000)  acc5: 99.2000 (99.2000)  time: 3.2015  data: 3.0020  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0519 (1.0785)  acc1: 80.8000 (80.9091)  acc5: 96.0000 (96.0000)  time: 0.4728  data: 0.3057  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.0941 (1.1024)  acc1: 78.8000 (79.9048)  acc5: 96.0000 (96.1714)  time: 0.2460  data: 0.0821  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2838 (1.2215)  acc1: 74.4000 (77.3548)  acc5: 94.0000 (94.5677)  time: 0.2633  data: 0.0994  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5409 (1.2991)  acc1: 70.4000 (75.5317)  acc5: 90.8000 (93.5707)  time: 0.2052  data: 0.0414  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5409 (1.3173)  acc1: 69.6000 (74.9360)  acc5: 90.4000 (93.2720)  time: 0.2050  data: 0.0413  max mem: 27255
Test: Total time: 0:00:13 (0.2759 s / it)
* Acc@1 74.900 Acc@5 93.150 loss 1.322
Accuracy of the model on the 50000 test images: 74.9%
Max accuracy: 75.02%
Epoch: [90]  [   0/2502]  eta: 1:33:17  lr: 0.003414  min_lr: 0.003414  loss: 3.6486 (3.6486)  weight_decay: 0.0500 (0.0500)  time: 2.2372  data: 1.8948  max mem: 27255
Epoch: [90]  [ 200/2502]  eta: 0:13:09  lr: 0.003413  min_lr: 0.003413  loss: 3.5446 (3.5724)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6820 (0.6487)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [90]  [ 400/2502]  eta: 0:11:51  lr: 0.003412  min_lr: 0.003412  loss: 3.7525 (3.5946)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6750 (0.6639)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [90]  [ 600/2502]  eta: 0:10:41  lr: 0.003411  min_lr: 0.003411  loss: 3.6662 (3.5983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6273 (0.6550)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [90]  [ 800/2502]  eta: 0:09:33  lr: 0.003409  min_lr: 0.003409  loss: 3.6469 (3.6028)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5945 (0.6488)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [90]  [1000/2502]  eta: 0:08:25  lr: 0.003408  min_lr: 0.003408  loss: 3.7840 (3.6178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6183 (0.6531)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [90]  [1200/2502]  eta: 0:07:17  lr: 0.003407  min_lr: 0.003407  loss: 3.6775 (3.6190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6218 (0.6499)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [90]  [1400/2502]  eta: 0:06:09  lr: 0.003405  min_lr: 0.003405  loss: 3.8663 (3.6238)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6463 (0.6486)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [90]  [1600/2502]  eta: 0:05:02  lr: 0.003404  min_lr: 0.003404  loss: 3.9829 (3.6363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5955 (0.6469)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [90]  [1800/2502]  eta: 0:03:55  lr: 0.003403  min_lr: 0.003403  loss: 3.3832 (3.6329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6255 (0.6467)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [90]  [2000/2502]  eta: 0:02:48  lr: 0.003402  min_lr: 0.003402  loss: 3.6564 (3.6380)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6375 (0.6487)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [90]  [2200/2502]  eta: 0:01:41  lr: 0.003400  min_lr: 0.003400  loss: 3.6557 (3.6327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6294 (0.6471)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [90]  [2400/2502]  eta: 0:00:34  lr: 0.003399  min_lr: 0.003399  loss: 3.7276 (3.6303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6887 (0.6487)  time: 0.3327  data: 0.0003  max mem: 27255
Epoch: [90]  [2501/2502]  eta: 0:00:00  lr: 0.003398  min_lr: 0.003398  loss: 3.9524 (3.6325)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6341 (0.6490)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [90] Total time: 0:13:57 (0.3346 s / it)
Averaged stats: lr: 0.003398  min_lr: 0.003398  loss: 3.9524 (3.6277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6341 (0.6490)
Test:  [ 0/50]  eta: 0:02:19  loss: 0.7474 (0.7474)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 2.7818  data: 2.5865  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0479 (1.1052)  acc1: 82.4000 (81.4182)  acc5: 96.0000 (96.2546)  time: 0.4083  data: 0.2415  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0591 (1.1039)  acc1: 78.8000 (80.1143)  acc5: 96.0000 (96.0762)  time: 0.1975  data: 0.0335  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2868 (1.2215)  acc1: 74.4000 (77.6000)  acc5: 92.0000 (94.2839)  time: 0.2292  data: 0.0652  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5874 (1.3026)  acc1: 70.4000 (75.5902)  acc5: 89.6000 (93.1805)  time: 0.2217  data: 0.0554  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5120 (1.3196)  acc1: 70.0000 (74.8800)  acc5: 90.0000 (93.0160)  time: 0.1883  data: 0.0202  max mem: 27255
Test: Total time: 0:00:12 (0.2552 s / it)
* Acc@1 74.830 Acc@5 93.002 loss 1.324
Accuracy of the model on the 50000 test images: 74.8%
Max accuracy: 75.02%
Epoch: [91]  [   0/2502]  eta: 1:30:09  lr: 0.003398  min_lr: 0.003398  loss: 2.3018 (2.3018)  weight_decay: 0.0500 (0.0500)  time: 2.1622  data: 1.6408  max mem: 27255
Epoch: [91]  [ 200/2502]  eta: 0:13:09  lr: 0.003397  min_lr: 0.003397  loss: 3.6795 (3.5840)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5695 (0.6768)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [91]  [ 400/2502]  eta: 0:11:51  lr: 0.003396  min_lr: 0.003396  loss: 3.8074 (3.6037)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6215 (0.6511)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [91]  [ 600/2502]  eta: 0:10:42  lr: 0.003395  min_lr: 0.003395  loss: 3.7483 (3.6370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6800 (0.6523)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [91]  [ 800/2502]  eta: 0:09:33  lr: 0.003393  min_lr: 0.003393  loss: 3.6519 (3.6313)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5946 (0.6475)  time: 0.3344  data: 0.0006  max mem: 27255
Epoch: [91]  [1000/2502]  eta: 0:08:25  lr: 0.003392  min_lr: 0.003392  loss: 3.6902 (3.6145)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6304 (0.6482)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [91]  [1200/2502]  eta: 0:07:17  lr: 0.003391  min_lr: 0.003391  loss: 3.8368 (3.6205)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6078 (0.6491)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [91]  [1400/2502]  eta: 0:06:10  lr: 0.003389  min_lr: 0.003389  loss: 3.7292 (3.6230)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6523 (0.6463)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [91]  [1600/2502]  eta: 0:05:02  lr: 0.003388  min_lr: 0.003388  loss: 3.7848 (3.6273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5994 (0.6433)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [91]  [1800/2502]  eta: 0:03:55  lr: 0.003387  min_lr: 0.003387  loss: 3.4808 (3.6276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6327 (0.6423)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [91]  [2000/2502]  eta: 0:02:48  lr: 0.003386  min_lr: 0.003386  loss: 3.7220 (3.6237)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7049 (0.6483)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [91]  [2200/2502]  eta: 0:01:41  lr: 0.003384  min_lr: 0.003384  loss: 3.6256 (3.6266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6300 (0.6469)  time: 0.3335  data: 0.0006  max mem: 27255
Epoch: [91]  [2400/2502]  eta: 0:00:34  lr: 0.003383  min_lr: 0.003383  loss: 3.7450 (3.6251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6535 (0.6499)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [91]  [2501/2502]  eta: 0:00:00  lr: 0.003382  min_lr: 0.003382  loss: 3.7452 (3.6270)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6251 (0.6502)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [91] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003382  min_lr: 0.003382  loss: 3.7452 (3.6277)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6251 (0.6502)
Test:  [ 0/50]  eta: 0:02:09  loss: 0.6426 (0.6426)  acc1: 89.2000 (89.2000)  acc5: 98.4000 (98.4000)  time: 2.5949  data: 2.3902  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9218 (0.9593)  acc1: 80.4000 (80.5455)  acc5: 95.6000 (95.6000)  time: 0.4472  data: 0.2796  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9549 (0.9768)  acc1: 78.4000 (79.3143)  acc5: 96.0000 (95.8286)  time: 0.2077  data: 0.0438  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1502 (1.0998)  acc1: 74.8000 (77.0581)  acc5: 93.2000 (94.3097)  time: 0.2417  data: 0.0765  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4191 (1.1876)  acc1: 68.4000 (75.1317)  acc5: 90.0000 (93.2585)  time: 0.2430  data: 0.0779  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4047 (1.2021)  acc1: 68.8000 (74.6720)  acc5: 90.0000 (93.0640)  time: 0.1746  data: 0.0110  max mem: 27255
Test: Total time: 0:00:13 (0.2633 s / it)
* Acc@1 75.130 Acc@5 93.006 loss 1.204
Accuracy of the model on the 50000 test images: 75.1%
Max accuracy: 75.13%
Epoch: [92]  [   0/2502]  eta: 1:16:05  lr: 0.003382  min_lr: 0.003382  loss: 4.1144 (4.1144)  weight_decay: 0.0500 (0.0500)  time: 1.8248  data: 1.4803  max mem: 27255
Epoch: [92]  [ 200/2502]  eta: 0:13:06  lr: 0.003381  min_lr: 0.003381  loss: 3.8236 (3.5367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6309 (0.6365)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [92]  [ 400/2502]  eta: 0:11:51  lr: 0.003380  min_lr: 0.003380  loss: 3.9021 (3.5820)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6545 (0.6532)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [92]  [ 600/2502]  eta: 0:10:41  lr: 0.003378  min_lr: 0.003378  loss: 3.9649 (3.6101)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6852 (0.6601)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [92]  [ 800/2502]  eta: 0:09:32  lr: 0.003377  min_lr: 0.003377  loss: 3.6514 (3.6272)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6518 (0.6570)  time: 0.3373  data: 0.0004  max mem: 27255
Epoch: [92]  [1000/2502]  eta: 0:08:24  lr: 0.003376  min_lr: 0.003376  loss: 3.5835 (3.6254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6630 (0.6550)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [92]  [1200/2502]  eta: 0:07:17  lr: 0.003374  min_lr: 0.003374  loss: 3.6759 (3.6173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6172 (0.6573)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [92]  [1400/2502]  eta: 0:06:10  lr: 0.003373  min_lr: 0.003373  loss: 3.8276 (3.6261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6411 (0.6595)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [92]  [1600/2502]  eta: 0:05:02  lr: 0.003372  min_lr: 0.003372  loss: 3.9375 (3.6254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6229 (0.6587)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [92]  [1800/2502]  eta: 0:03:55  lr: 0.003371  min_lr: 0.003371  loss: 3.6979 (3.6251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6577 (0.6585)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [92]  [2000/2502]  eta: 0:02:48  lr: 0.003369  min_lr: 0.003369  loss: 3.4489 (3.6251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6095 (0.6572)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [92]  [2200/2502]  eta: 0:01:41  lr: 0.003368  min_lr: 0.003368  loss: 4.0701 (3.6319)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6299 (0.6586)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [92]  [2400/2502]  eta: 0:00:34  lr: 0.003367  min_lr: 0.003367  loss: 3.6678 (3.6324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6360 (0.6586)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [92]  [2501/2502]  eta: 0:00:00  lr: 0.003366  min_lr: 0.003366  loss: 3.8856 (3.6353)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5939 (0.6568)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [92] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003366  min_lr: 0.003366  loss: 3.8856 (3.6116)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5939 (0.6568)
Test:  [ 0/50]  eta: 0:02:19  loss: 0.7637 (0.7637)  acc1: 90.0000 (90.0000)  acc5: 97.6000 (97.6000)  time: 2.7899  data: 2.5940  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9903 (1.0746)  acc1: 80.0000 (81.0182)  acc5: 96.0000 (95.6000)  time: 0.4049  data: 0.2376  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0329 (1.0716)  acc1: 79.6000 (80.3048)  acc5: 96.0000 (95.7905)  time: 0.2268  data: 0.0622  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1931 (1.1880)  acc1: 74.0000 (77.7419)  acc5: 93.6000 (94.0645)  time: 0.2548  data: 0.0904  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4960 (1.2656)  acc1: 70.4000 (75.5415)  acc5: 89.2000 (93.1122)  time: 0.2053  data: 0.0414  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4545 (1.2837)  acc1: 69.6000 (75.0080)  acc5: 90.8000 (93.0160)  time: 0.1768  data: 0.0130  max mem: 27255
Test: Total time: 0:00:13 (0.2604 s / it)
* Acc@1 74.894 Acc@5 93.016 loss 1.290
Accuracy of the model on the 50000 test images: 74.9%
Max accuracy: 75.13%
Epoch: [93]  [   0/2502]  eta: 1:21:36  lr: 0.003366  min_lr: 0.003366  loss: 4.1015 (4.1015)  weight_decay: 0.0500 (0.0500)  time: 1.9570  data: 1.6118  max mem: 27255
Epoch: [93]  [ 200/2502]  eta: 0:13:09  lr: 0.003365  min_lr: 0.003365  loss: 3.5456 (3.6366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6472 (0.6559)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [93]  [ 400/2502]  eta: 0:11:51  lr: 0.003363  min_lr: 0.003363  loss: 3.6639 (3.6381)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6631 (0.6603)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [93]  [ 600/2502]  eta: 0:10:42  lr: 0.003362  min_lr: 0.003362  loss: 3.5896 (3.6409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6195 (0.6497)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [93]  [ 800/2502]  eta: 0:09:33  lr: 0.003361  min_lr: 0.003361  loss: 3.7978 (3.6242)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6391 (0.6488)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [93]  [1000/2502]  eta: 0:08:25  lr: 0.003359  min_lr: 0.003359  loss: 3.8769 (3.6284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6449 (0.6530)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [93]  [1200/2502]  eta: 0:07:17  lr: 0.003358  min_lr: 0.003358  loss: 3.3685 (3.6173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6881 (0.6551)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [93]  [1400/2502]  eta: 0:06:10  lr: 0.003357  min_lr: 0.003357  loss: 3.6677 (3.6054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6659 (0.6591)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [93]  [1600/2502]  eta: 0:05:02  lr: 0.003355  min_lr: 0.003355  loss: 3.8058 (3.6126)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6301 (0.6571)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [93]  [1800/2502]  eta: 0:03:55  lr: 0.003354  min_lr: 0.003354  loss: 3.5169 (3.6142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6281 (0.6575)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [93]  [2000/2502]  eta: 0:02:48  lr: 0.003353  min_lr: 0.003353  loss: 3.9553 (3.6189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6602 (0.6601)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [93]  [2200/2502]  eta: 0:01:41  lr: 0.003351  min_lr: 0.003351  loss: 3.8045 (3.6169)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6312 (0.6582)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [93]  [2400/2502]  eta: 0:00:34  lr: 0.003350  min_lr: 0.003350  loss: 3.8600 (3.6108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6914 (0.6587)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [93]  [2501/2502]  eta: 0:00:00  lr: 0.003350  min_lr: 0.003350  loss: 3.9725 (3.6129)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6258 (0.6584)  time: 0.3009  data: 0.0008  max mem: 27255
Epoch: [93] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003350  min_lr: 0.003350  loss: 3.9725 (3.6210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6258 (0.6584)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.7493 (0.7493)  acc1: 90.0000 (90.0000)  acc5: 97.2000 (97.2000)  time: 2.6594  data: 2.4527  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9172 (0.9944)  acc1: 80.8000 (81.4182)  acc5: 96.0000 (96.1091)  time: 0.4741  data: 0.3063  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0145 (1.0060)  acc1: 79.6000 (80.6667)  acc5: 96.0000 (96.4000)  time: 0.2231  data: 0.0591  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0831 (1.1405)  acc1: 76.0000 (78.0903)  acc5: 94.4000 (94.4774)  time: 0.2270  data: 0.0626  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4921 (1.2242)  acc1: 70.0000 (76.0293)  acc5: 89.2000 (93.3073)  time: 0.2179  data: 0.0536  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4324 (1.2386)  acc1: 70.8000 (75.6560)  acc5: 90.4000 (93.1600)  time: 0.1755  data: 0.0115  max mem: 27255
Test: Total time: 0:00:13 (0.2637 s / it)
* Acc@1 75.332 Acc@5 93.200 loss 1.239
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.33%
Epoch: [94]  [   0/2502]  eta: 1:35:54  lr: 0.003350  min_lr: 0.003350  loss: 3.8619 (3.8619)  weight_decay: 0.0500 (0.0500)  time: 2.3001  data: 1.9571  max mem: 27255
Epoch: [94]  [ 200/2502]  eta: 0:13:11  lr: 0.003348  min_lr: 0.003348  loss: 3.8396 (3.5960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6179 (0.6662)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [94]  [ 400/2502]  eta: 0:11:52  lr: 0.003347  min_lr: 0.003347  loss: 3.5489 (3.6159)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6779 (0.6535)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [94]  [ 600/2502]  eta: 0:10:41  lr: 0.003346  min_lr: 0.003346  loss: 4.0814 (3.6335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6496 (0.6616)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [94]  [ 800/2502]  eta: 0:09:33  lr: 0.003344  min_lr: 0.003344  loss: 3.5082 (3.6165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6953 (0.6577)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [94]  [1000/2502]  eta: 0:08:25  lr: 0.003343  min_lr: 0.003343  loss: 3.9342 (3.6266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5923 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [94]  [1200/2502]  eta: 0:07:17  lr: 0.003342  min_lr: 0.003342  loss: 3.8078 (3.6200)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6370 (inf)  time: 0.3410  data: 0.0004  max mem: 27255
Epoch: [94]  [1400/2502]  eta: 0:06:09  lr: 0.003340  min_lr: 0.003340  loss: 3.9304 (3.6198)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6339 (inf)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [94]  [1600/2502]  eta: 0:05:02  lr: 0.003339  min_lr: 0.003339  loss: 3.6303 (3.6191)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6342 (inf)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [94]  [1800/2502]  eta: 0:03:55  lr: 0.003338  min_lr: 0.003338  loss: 3.6649 (3.6177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6473 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [94]  [2000/2502]  eta: 0:02:48  lr: 0.003336  min_lr: 0.003336  loss: 3.5288 (3.6110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6251 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [94]  [2200/2502]  eta: 0:01:41  lr: 0.003335  min_lr: 0.003335  loss: 3.5301 (3.6084)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5918 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [94]  [2400/2502]  eta: 0:00:34  lr: 0.003334  min_lr: 0.003334  loss: 3.6335 (3.6108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6022 (inf)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [94]  [2501/2502]  eta: 0:00:00  lr: 0.003333  min_lr: 0.003333  loss: 3.5175 (3.6074)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6182 (inf)  time: 0.3008  data: 0.0009  max mem: 27255
Epoch: [94] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003333  min_lr: 0.003333  loss: 3.5175 (3.6116)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6182 (inf)
Test:  [ 0/50]  eta: 0:02:21  loss: 0.6724 (0.6724)  acc1: 90.4000 (90.4000)  acc5: 98.0000 (98.0000)  time: 2.8260  data: 2.6344  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9723 (0.9758)  acc1: 81.2000 (81.6364)  acc5: 97.2000 (96.3636)  time: 0.4220  data: 0.2520  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9622 (0.9900)  acc1: 79.6000 (80.2667)  acc5: 96.4000 (96.4762)  time: 0.1739  data: 0.0071  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1198 (1.1170)  acc1: 75.2000 (77.8452)  acc5: 94.4000 (94.6452)  time: 0.1998  data: 0.0349  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4264 (1.2026)  acc1: 70.0000 (75.7268)  acc5: 90.0000 (93.4634)  time: 0.2336  data: 0.0699  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4253 (1.2148)  acc1: 70.0000 (75.2080)  acc5: 90.0000 (93.3200)  time: 0.1992  data: 0.0357  max mem: 27255
Test: Total time: 0:00:12 (0.2509 s / it)
* Acc@1 75.154 Acc@5 93.322 loss 1.217
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.33%
Epoch: [95]  [   0/2502]  eta: 1:33:46  lr: 0.003333  min_lr: 0.003333  loss: 3.8122 (3.8122)  weight_decay: 0.0500 (0.0500)  time: 2.2487  data: 1.6924  max mem: 27255
Epoch: [95]  [ 200/2502]  eta: 0:13:13  lr: 0.003332  min_lr: 0.003332  loss: 3.7069 (3.6187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5926 (0.6500)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [95]  [ 400/2502]  eta: 0:11:54  lr: 0.003330  min_lr: 0.003330  loss: 3.5826 (3.6198)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7230 (0.6649)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [95]  [ 600/2502]  eta: 0:10:42  lr: 0.003329  min_lr: 0.003329  loss: 3.5722 (3.5976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6763 (0.6665)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [95]  [ 800/2502]  eta: 0:09:33  lr: 0.003327  min_lr: 0.003327  loss: 3.8499 (3.6034)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6446 (0.6645)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [95]  [1000/2502]  eta: 0:08:25  lr: 0.003326  min_lr: 0.003326  loss: 3.7131 (3.6065)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6269 (0.6615)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [95]  [1200/2502]  eta: 0:07:17  lr: 0.003325  min_lr: 0.003325  loss: 3.5649 (3.6054)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [95]  [1400/2502]  eta: 0:06:10  lr: 0.003323  min_lr: 0.003323  loss: 2.9917 (3.6040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6733 (nan)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [95]  [1600/2502]  eta: 0:05:02  lr: 0.003322  min_lr: 0.003322  loss: 3.8219 (3.6068)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6516 (nan)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [95]  [1800/2502]  eta: 0:03:55  lr: 0.003321  min_lr: 0.003321  loss: 3.7764 (3.6144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5929 (nan)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [95]  [2000/2502]  eta: 0:02:48  lr: 0.003319  min_lr: 0.003319  loss: 3.8415 (3.6160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6423 (nan)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [95]  [2200/2502]  eta: 0:01:41  lr: 0.003318  min_lr: 0.003318  loss: 3.7995 (3.6186)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6466 (nan)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [95]  [2400/2502]  eta: 0:00:34  lr: 0.003317  min_lr: 0.003317  loss: 3.4217 (3.6137)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6273 (nan)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [95]  [2501/2502]  eta: 0:00:00  lr: 0.003316  min_lr: 0.003316  loss: 3.5315 (3.6120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6280 (nan)  time: 0.3008  data: 0.0009  max mem: 27255
Epoch: [95] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.003316  min_lr: 0.003316  loss: 3.5315 (3.6135)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6280 (nan)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.6867 (0.6867)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 2.7422  data: 2.5417  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9786 (1.0282)  acc1: 83.2000 (81.8182)  acc5: 96.4000 (96.2182)  time: 0.4206  data: 0.2532  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0068 (1.0357)  acc1: 80.4000 (80.4571)  acc5: 95.6000 (96.0571)  time: 0.1916  data: 0.0278  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1740 (1.1537)  acc1: 74.4000 (77.9097)  acc5: 93.2000 (94.4129)  time: 0.2258  data: 0.0613  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4481 (1.2259)  acc1: 71.6000 (76.1659)  acc5: 90.4000 (93.4634)  time: 0.2175  data: 0.0531  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3985 (1.2473)  acc1: 70.4000 (75.4080)  acc5: 90.4000 (93.2480)  time: 0.1709  data: 0.0074  max mem: 27255
Test: Total time: 0:00:12 (0.2499 s / it)
* Acc@1 75.276 Acc@5 93.262 loss 1.247
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.33%
Epoch: [96]  [   0/2502]  eta: 1:18:38  lr: 0.003316  min_lr: 0.003316  loss: 3.5371 (3.5371)  weight_decay: 0.0500 (0.0500)  time: 1.8860  data: 1.3820  max mem: 27255
Epoch: [96]  [ 200/2502]  eta: 0:13:10  lr: 0.003315  min_lr: 0.003315  loss: 3.8420 (3.6220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7086 (0.6907)  time: 0.3337  data: 0.0006  max mem: 27255
Epoch: [96]  [ 400/2502]  eta: 0:11:51  lr: 0.003313  min_lr: 0.003313  loss: 3.7462 (3.6195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7073 (0.6872)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [96]  [ 600/2502]  eta: 0:10:41  lr: 0.003312  min_lr: 0.003312  loss: 3.6890 (3.6051)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5882 (0.6663)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [96]  [ 800/2502]  eta: 0:09:32  lr: 0.003311  min_lr: 0.003311  loss: 3.7161 (3.6054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6055 (0.6634)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [96]  [1000/2502]  eta: 0:08:24  lr: 0.003309  min_lr: 0.003309  loss: 3.8074 (3.6063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6484 (0.6603)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [96]  [1200/2502]  eta: 0:07:17  lr: 0.003308  min_lr: 0.003308  loss: 3.7113 (3.6172)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6006 (0.6636)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [96]  [1400/2502]  eta: 0:06:09  lr: 0.003307  min_lr: 0.003307  loss: 3.6449 (3.6179)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6677 (0.6633)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [96]  [1600/2502]  eta: 0:05:02  lr: 0.003305  min_lr: 0.003305  loss: 3.8395 (3.6208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6150 (0.6593)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [96]  [1800/2502]  eta: 0:03:55  lr: 0.003304  min_lr: 0.003304  loss: 3.6154 (3.6133)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6888 (0.6618)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [96]  [2000/2502]  eta: 0:02:48  lr: 0.003302  min_lr: 0.003302  loss: 3.7503 (3.6077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6034 (0.6629)  time: 0.3361  data: 0.0004  max mem: 27255
Epoch: [96]  [2200/2502]  eta: 0:01:41  lr: 0.003301  min_lr: 0.003301  loss: 3.7843 (3.6093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5801 (0.6595)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [96]  [2400/2502]  eta: 0:00:34  lr: 0.003300  min_lr: 0.003300  loss: 3.6795 (3.6078)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6679 (0.6597)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [96]  [2501/2502]  eta: 0:00:00  lr: 0.003299  min_lr: 0.003299  loss: 3.6476 (3.6080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5806 (0.6585)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [96] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.003299  min_lr: 0.003299  loss: 3.6476 (3.6054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5806 (0.6585)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.7791 (0.7791)  acc1: 90.8000 (90.8000)  acc5: 98.8000 (98.8000)  time: 3.0135  data: 2.8166  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.0663 (1.1234)  acc1: 82.4000 (81.1636)  acc5: 96.0000 (95.8546)  time: 0.4939  data: 0.3270  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0943 (1.1160)  acc1: 79.6000 (80.5333)  acc5: 96.0000 (96.0762)  time: 0.2053  data: 0.0414  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2651 (1.2298)  acc1: 75.2000 (78.0258)  acc5: 93.2000 (94.2194)  time: 0.2023  data: 0.0383  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4992 (1.3026)  acc1: 70.4000 (75.7561)  acc5: 90.4000 (93.2390)  time: 0.2010  data: 0.0371  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4845 (1.3142)  acc1: 70.4000 (75.2480)  acc5: 90.8000 (93.1520)  time: 0.1651  data: 0.0013  max mem: 27255
Test: Total time: 0:00:12 (0.2543 s / it)
* Acc@1 75.244 Acc@5 93.194 loss 1.316
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.33%
Epoch: [97]  [   0/2502]  eta: 1:33:54  lr: 0.003299  min_lr: 0.003299  loss: 3.0539 (3.0539)  weight_decay: 0.0500 (0.0500)  time: 2.2522  data: 1.9038  max mem: 27255
Epoch: [97]  [ 200/2502]  eta: 0:13:11  lr: 0.003298  min_lr: 0.003298  loss: 3.7950 (3.5350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6451 (0.6679)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [97]  [ 400/2502]  eta: 0:11:53  lr: 0.003296  min_lr: 0.003296  loss: 3.6431 (3.5708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6509 (0.6632)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [97]  [ 600/2502]  eta: 0:10:42  lr: 0.003295  min_lr: 0.003295  loss: 3.5293 (3.5717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6091 (0.6531)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [97]  [ 800/2502]  eta: 0:09:33  lr: 0.003294  min_lr: 0.003294  loss: 3.6113 (3.5801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6506 (0.6611)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [97]  [1000/2502]  eta: 0:08:25  lr: 0.003292  min_lr: 0.003292  loss: 3.9001 (3.5838)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6584 (0.6607)  time: 0.3366  data: 0.0005  max mem: 27255
Epoch: [97]  [1200/2502]  eta: 0:07:17  lr: 0.003291  min_lr: 0.003291  loss: 3.7190 (3.5822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6534 (0.6619)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [97]  [1400/2502]  eta: 0:06:10  lr: 0.003289  min_lr: 0.003289  loss: 3.5144 (3.5757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6627 (0.6598)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [97]  [1600/2502]  eta: 0:05:02  lr: 0.003288  min_lr: 0.003288  loss: 3.7494 (3.5807)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6162 (0.6602)  time: 0.3426  data: 0.0004  max mem: 27255
Epoch: [97]  [1800/2502]  eta: 0:03:55  lr: 0.003287  min_lr: 0.003287  loss: 3.5983 (3.5817)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6350 (0.6564)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [97]  [2000/2502]  eta: 0:02:48  lr: 0.003285  min_lr: 0.003285  loss: 3.5553 (3.5847)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6391 (0.6570)  time: 0.3344  data: 0.0003  max mem: 27255
Epoch: [97]  [2200/2502]  eta: 0:01:41  lr: 0.003284  min_lr: 0.003284  loss: 3.9504 (3.5893)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6500 (0.6563)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [97]  [2400/2502]  eta: 0:00:34  lr: 0.003283  min_lr: 0.003283  loss: 3.8397 (3.5896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6390 (0.6562)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [97]  [2501/2502]  eta: 0:00:00  lr: 0.003282  min_lr: 0.003282  loss: 3.6930 (3.5928)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6667 (0.6564)  time: 0.3011  data: 0.0009  max mem: 27255
Epoch: [97] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003282  min_lr: 0.003282  loss: 3.6930 (3.5916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6667 (0.6564)
Test:  [ 0/50]  eta: 0:02:21  loss: 0.7715 (0.7715)  acc1: 90.4000 (90.4000)  acc5: 98.8000 (98.8000)  time: 2.8234  data: 2.6214  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9606 (1.0231)  acc1: 82.0000 (82.8000)  acc5: 96.4000 (95.9636)  time: 0.4062  data: 0.2387  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0115 (1.0437)  acc1: 80.0000 (81.3143)  acc5: 96.4000 (96.1524)  time: 0.1865  data: 0.0225  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1909 (1.1664)  acc1: 76.0000 (78.3742)  acc5: 94.0000 (94.5161)  time: 0.2182  data: 0.0543  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5101 (1.2533)  acc1: 70.8000 (76.3024)  acc5: 90.8000 (93.5512)  time: 0.2212  data: 0.0575  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4786 (1.2684)  acc1: 70.4000 (75.9440)  acc5: 91.6000 (93.3600)  time: 0.1918  data: 0.0282  max mem: 27255
Test: Total time: 0:00:12 (0.2517 s / it)
* Acc@1 75.672 Acc@5 93.286 loss 1.275
Accuracy of the model on the 50000 test images: 75.7%
Max accuracy: 75.67%
Epoch: [98]  [   0/2502]  eta: 1:21:11  lr: 0.003282  min_lr: 0.003282  loss: 2.5387 (2.5387)  weight_decay: 0.0500 (0.0500)  time: 1.9471  data: 1.5984  max mem: 27255
Epoch: [98]  [ 200/2502]  eta: 0:13:06  lr: 0.003281  min_lr: 0.003281  loss: 3.7674 (3.5597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6493 (0.6693)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [98]  [ 400/2502]  eta: 0:11:49  lr: 0.003279  min_lr: 0.003279  loss: 3.5379 (3.5494)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6559 (0.6598)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [98]  [ 600/2502]  eta: 0:10:39  lr: 0.003278  min_lr: 0.003278  loss: 3.6995 (3.5340)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6004 (0.6588)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [98]  [ 800/2502]  eta: 0:09:31  lr: 0.003276  min_lr: 0.003276  loss: 3.8344 (3.5632)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (0.6534)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [98]  [1000/2502]  eta: 0:08:24  lr: 0.003275  min_lr: 0.003275  loss: 3.6457 (3.5861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5895 (0.6494)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [98]  [1200/2502]  eta: 0:07:16  lr: 0.003274  min_lr: 0.003274  loss: 3.8277 (3.5829)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6263 (0.6486)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [98]  [1400/2502]  eta: 0:06:09  lr: 0.003272  min_lr: 0.003272  loss: 3.7207 (3.5916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6430 (0.6536)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [98]  [1600/2502]  eta: 0:05:02  lr: 0.003271  min_lr: 0.003271  loss: 3.9029 (3.5912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6387 (0.6539)  time: 0.3415  data: 0.0004  max mem: 27255
Epoch: [98]  [1800/2502]  eta: 0:03:55  lr: 0.003269  min_lr: 0.003269  loss: 3.7075 (3.5937)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6748 (0.6575)  time: 0.3350  data: 0.0006  max mem: 27255
Epoch: [98]  [2000/2502]  eta: 0:02:48  lr: 0.003268  min_lr: 0.003268  loss: 3.4925 (3.5893)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6661 (0.6573)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [98]  [2200/2502]  eta: 0:01:41  lr: 0.003267  min_lr: 0.003267  loss: 3.6888 (3.5916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6151 (0.6605)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [98]  [2400/2502]  eta: 0:00:34  lr: 0.003265  min_lr: 0.003265  loss: 3.3627 (3.5865)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6131 (0.6595)  time: 0.3354  data: 0.0006  max mem: 27255
Epoch: [98]  [2501/2502]  eta: 0:00:00  lr: 0.003265  min_lr: 0.003265  loss: 3.6289 (3.5890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6055 (0.6596)  time: 0.3005  data: 0.0009  max mem: 27255
Epoch: [98] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003265  min_lr: 0.003265  loss: 3.6289 (3.5938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6055 (0.6596)
Test:  [ 0/50]  eta: 0:02:13  loss: 0.7163 (0.7163)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 2.6734  data: 2.4784  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9987 (1.0611)  acc1: 82.4000 (80.7636)  acc5: 96.0000 (95.9273)  time: 0.4175  data: 0.2509  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0781 (1.0937)  acc1: 79.2000 (79.9429)  acc5: 96.0000 (96.1333)  time: 0.2164  data: 0.0484  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2618 (1.2078)  acc1: 76.4000 (77.5097)  acc5: 93.2000 (93.9742)  time: 0.2032  data: 0.0346  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5123 (1.2849)  acc1: 70.0000 (75.5707)  acc5: 90.0000 (93.1317)  time: 0.2059  data: 0.0416  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4660 (1.2972)  acc1: 70.0000 (75.0080)  acc5: 90.8000 (92.9360)  time: 0.2164  data: 0.0527  max mem: 27255
Test: Total time: 0:00:12 (0.2583 s / it)
* Acc@1 75.094 Acc@5 93.020 loss 1.297
Accuracy of the model on the 50000 test images: 75.1%
Max accuracy: 75.67%
Epoch: [99]  [   0/2502]  eta: 1:37:38  lr: 0.003265  min_lr: 0.003265  loss: 3.1478 (3.1478)  weight_decay: 0.0500 (0.0500)  time: 2.3417  data: 1.4284  max mem: 27255
Epoch: [99]  [ 200/2502]  eta: 0:13:12  lr: 0.003263  min_lr: 0.003263  loss: 3.6637 (3.6062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6342 (0.7215)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [99]  [ 400/2502]  eta: 0:11:53  lr: 0.003262  min_lr: 0.003262  loss: 3.8617 (3.6105)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6946 (0.7119)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [99]  [ 600/2502]  eta: 0:10:42  lr: 0.003260  min_lr: 0.003260  loss: 3.8520 (3.5958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6364 (0.6980)  time: 0.3371  data: 0.0003  max mem: 27255
Epoch: [99]  [ 800/2502]  eta: 0:09:34  lr: 0.003259  min_lr: 0.003259  loss: 3.5556 (3.5978)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6613 (0.6895)  time: 0.3348  data: 0.0003  max mem: 27255
Epoch: [99]  [1000/2502]  eta: 0:08:25  lr: 0.003258  min_lr: 0.003258  loss: 3.6656 (3.5911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6826 (0.6896)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [99]  [1200/2502]  eta: 0:07:18  lr: 0.003256  min_lr: 0.003256  loss: 3.6083 (3.5847)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6220 (0.6806)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [99]  [1400/2502]  eta: 0:06:10  lr: 0.003255  min_lr: 0.003255  loss: 3.7705 (3.5895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6406 (0.6742)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [99]  [1600/2502]  eta: 0:05:03  lr: 0.003253  min_lr: 0.003253  loss: 3.5346 (3.5912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6486 (0.6735)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [99]  [1800/2502]  eta: 0:03:55  lr: 0.003252  min_lr: 0.003252  loss: 3.7725 (3.5885)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6184 (0.6714)  time: 0.3327  data: 0.0005  max mem: 27255
Epoch: [99]  [2000/2502]  eta: 0:02:48  lr: 0.003251  min_lr: 0.003251  loss: 3.7715 (3.5850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6334 (0.6662)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [99]  [2200/2502]  eta: 0:01:41  lr: 0.003249  min_lr: 0.003249  loss: 3.7453 (3.5892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6456 (0.6687)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [99]  [2400/2502]  eta: 0:00:34  lr: 0.003248  min_lr: 0.003248  loss: 3.7729 (3.5918)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6511 (0.6691)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [99]  [2501/2502]  eta: 0:00:00  lr: 0.003247  min_lr: 0.003247  loss: 3.4859 (3.5879)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6592 (0.6717)  time: 0.2998  data: 0.0009  max mem: 27255
Epoch: [99] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003247  min_lr: 0.003247  loss: 3.4859 (3.5994)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6592 (0.6717)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.7303 (0.7303)  acc1: 90.0000 (90.0000)  acc5: 98.0000 (98.0000)  time: 3.2396  data: 3.0446  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9705 (1.0151)  acc1: 80.0000 (81.2364)  acc5: 96.4000 (96.1091)  time: 0.4494  data: 0.2827  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9885 (1.0234)  acc1: 79.6000 (80.2667)  acc5: 96.4000 (96.2857)  time: 0.1747  data: 0.0108  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1336 (1.1242)  acc1: 74.0000 (77.7290)  acc5: 94.4000 (94.5548)  time: 0.1905  data: 0.0265  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4297 (1.2034)  acc1: 70.8000 (75.8146)  acc5: 90.0000 (93.4732)  time: 0.1970  data: 0.0330  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4150 (1.2091)  acc1: 70.0000 (75.3120)  acc5: 90.0000 (93.3840)  time: 0.1873  data: 0.0235  max mem: 27255
Test: Total time: 0:00:12 (0.2487 s / it)
* Acc@1 75.568 Acc@5 93.386 loss 1.212
Accuracy of the model on the 50000 test images: 75.6%
Max accuracy: 75.67%
Epoch: [100]  [   0/2502]  eta: 1:32:27  lr: 0.003247  min_lr: 0.003247  loss: 3.6893 (3.6893)  weight_decay: 0.0500 (0.0500)  time: 2.2174  data: 1.5278  max mem: 27255
Epoch: [100]  [ 200/2502]  eta: 0:13:10  lr: 0.003246  min_lr: 0.003246  loss: 3.5273 (3.5001)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6239 (0.6376)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [100]  [ 400/2502]  eta: 0:11:52  lr: 0.003244  min_lr: 0.003244  loss: 3.6202 (3.5429)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7282 (0.6573)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [100]  [ 600/2502]  eta: 0:10:42  lr: 0.003243  min_lr: 0.003243  loss: 3.4745 (3.5579)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6055 (0.6546)  time: 0.3421  data: 0.0005  max mem: 27255
Epoch: [100]  [ 800/2502]  eta: 0:09:33  lr: 0.003242  min_lr: 0.003242  loss: 3.5995 (3.5592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5826 (0.6550)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [100]  [1000/2502]  eta: 0:08:26  lr: 0.003240  min_lr: 0.003240  loss: 3.8375 (3.5687)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6296 (0.6571)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [100]  [1200/2502]  eta: 0:07:18  lr: 0.003239  min_lr: 0.003239  loss: 3.6508 (3.5731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6723 (0.6589)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [100]  [1400/2502]  eta: 0:06:10  lr: 0.003237  min_lr: 0.003237  loss: 3.7781 (3.5798)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6555 (0.6577)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [100]  [1600/2502]  eta: 0:05:03  lr: 0.003236  min_lr: 0.003236  loss: 3.9789 (3.5919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6632 (0.6605)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [100]  [1800/2502]  eta: 0:03:55  lr: 0.003234  min_lr: 0.003234  loss: 3.7790 (3.5920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6408 (0.6616)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [100]  [2000/2502]  eta: 0:02:48  lr: 0.003233  min_lr: 0.003233  loss: 3.5901 (3.5983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7241 (0.6657)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [100]  [2200/2502]  eta: 0:01:41  lr: 0.003232  min_lr: 0.003232  loss: 3.8534 (3.5960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6232 (0.6687)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [100]  [2400/2502]  eta: 0:00:34  lr: 0.003230  min_lr: 0.003230  loss: 3.8059 (3.6009)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6731 (0.6684)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [100]  [2501/2502]  eta: 0:00:00  lr: 0.003230  min_lr: 0.003230  loss: 3.6222 (3.5980)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6565 (0.6679)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [100] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003230  min_lr: 0.003230  loss: 3.6222 (3.5884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6565 (0.6679)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.7171 (0.7171)  acc1: 92.0000 (92.0000)  acc5: 98.0000 (98.0000)  time: 3.1286  data: 2.9232  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9952 (1.0402)  acc1: 83.2000 (82.2545)  acc5: 95.6000 (96.0000)  time: 0.4512  data: 0.2831  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0265 (1.0473)  acc1: 79.6000 (81.3714)  acc5: 96.0000 (96.1905)  time: 0.1832  data: 0.0191  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2062 (1.1578)  acc1: 76.4000 (78.8258)  acc5: 94.4000 (94.5419)  time: 0.1859  data: 0.0221  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4754 (1.2442)  acc1: 70.4000 (76.5659)  acc5: 90.0000 (93.5122)  time: 0.2095  data: 0.0459  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4754 (1.2597)  acc1: 70.0000 (75.9120)  acc5: 90.8000 (93.4160)  time: 0.2064  data: 0.0429  max mem: 27255
Test: Total time: 0:00:12 (0.2550 s / it)
* Acc@1 75.748 Acc@5 93.440 loss 1.263
Accuracy of the model on the 50000 test images: 75.7%
Max accuracy: 75.75%
Epoch: [101]  [   0/2502]  eta: 1:31:40  lr: 0.003230  min_lr: 0.003230  loss: 4.2938 (4.2938)  weight_decay: 0.0500 (0.0500)  time: 2.1984  data: 1.8489  max mem: 27255
Epoch: [101]  [ 200/2502]  eta: 0:13:09  lr: 0.003228  min_lr: 0.003228  loss: 3.7844 (3.5746)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6162 (0.6530)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [101]  [ 400/2502]  eta: 0:11:51  lr: 0.003227  min_lr: 0.003227  loss: 3.7509 (3.6053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6717 (0.6551)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [101]  [ 600/2502]  eta: 0:10:41  lr: 0.003225  min_lr: 0.003225  loss: 3.6234 (3.5858)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6098 (0.6588)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [101]  [ 800/2502]  eta: 0:09:33  lr: 0.003224  min_lr: 0.003224  loss: 3.7879 (3.5983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7215 (0.6684)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [101]  [1000/2502]  eta: 0:08:24  lr: 0.003222  min_lr: 0.003222  loss: 3.5469 (3.5896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6381 (0.6699)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [101]  [1200/2502]  eta: 0:07:16  lr: 0.003221  min_lr: 0.003221  loss: 3.5235 (3.5810)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7027 (0.6716)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [101]  [1400/2502]  eta: 0:06:09  lr: 0.003220  min_lr: 0.003220  loss: 3.4537 (3.5834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6569 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [101]  [1600/2502]  eta: 0:05:02  lr: 0.003218  min_lr: 0.003218  loss: 3.5733 (3.5876)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6374 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [101]  [1800/2502]  eta: 0:03:55  lr: 0.003217  min_lr: 0.003217  loss: 3.7430 (3.5874)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6729 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [101]  [2000/2502]  eta: 0:02:48  lr: 0.003215  min_lr: 0.003215  loss: 3.8846 (3.5872)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6351 (inf)  time: 0.3386  data: 0.0005  max mem: 27255
Epoch: [101]  [2200/2502]  eta: 0:01:41  lr: 0.003214  min_lr: 0.003214  loss: 3.7587 (3.5927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6372 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [101]  [2400/2502]  eta: 0:00:34  lr: 0.003212  min_lr: 0.003212  loss: 3.6535 (3.5969)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6294 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [101]  [2501/2502]  eta: 0:00:00  lr: 0.003212  min_lr: 0.003212  loss: 3.4288 (3.5957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7098 (inf)  time: 0.3008  data: 0.0007  max mem: 27255
Epoch: [101] Total time: 0:13:57 (0.3346 s / it)
Averaged stats: lr: 0.003212  min_lr: 0.003212  loss: 3.4288 (3.5850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7098 (inf)
Test:  [ 0/50]  eta: 0:02:25  loss: 0.6887 (0.6887)  acc1: 89.6000 (89.6000)  acc5: 98.0000 (98.0000)  time: 2.9200  data: 2.7151  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9949 (0.9947)  acc1: 82.0000 (81.3091)  acc5: 96.4000 (96.1455)  time: 0.4593  data: 0.2913  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9949 (1.0011)  acc1: 80.0000 (80.4381)  acc5: 96.4000 (96.2667)  time: 0.2055  data: 0.0414  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1691 (1.1175)  acc1: 74.8000 (77.8452)  acc5: 94.0000 (94.4645)  time: 0.2007  data: 0.0367  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4250 (1.1953)  acc1: 70.4000 (76.0488)  acc5: 90.4000 (93.5317)  time: 0.2026  data: 0.0386  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4122 (1.2187)  acc1: 72.0000 (75.5760)  acc5: 90.8000 (93.2800)  time: 0.1912  data: 0.0274  max mem: 27255
Test: Total time: 0:00:12 (0.2531 s / it)
* Acc@1 75.506 Acc@5 93.296 loss 1.220
Accuracy of the model on the 50000 test images: 75.5%
Max accuracy: 75.75%
Epoch: [102]  [   0/2502]  eta: 1:33:23  lr: 0.003212  min_lr: 0.003212  loss: 2.6275 (2.6275)  weight_decay: 0.0500 (0.0500)  time: 2.2395  data: 1.4027  max mem: 27255
Epoch: [102]  [ 200/2502]  eta: 0:13:15  lr: 0.003210  min_lr: 0.003210  loss: 3.4708 (3.6056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6594 (0.6843)  time: 0.3451  data: 0.0004  max mem: 27255
Epoch: [102]  [ 400/2502]  eta: 0:11:55  lr: 0.003209  min_lr: 0.003209  loss: 3.7209 (3.5966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6507 (0.6801)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [102]  [ 600/2502]  eta: 0:10:44  lr: 0.003207  min_lr: 0.003207  loss: 3.7303 (3.6177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6372 (0.6778)  time: 0.3365  data: 0.0005  max mem: 27255
Epoch: [102]  [ 800/2502]  eta: 0:09:34  lr: 0.003206  min_lr: 0.003206  loss: 3.9201 (3.6188)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5772 (0.6665)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [102]  [1000/2502]  eta: 0:08:25  lr: 0.003205  min_lr: 0.003205  loss: 3.7509 (3.6204)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6587 (0.6656)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [102]  [1200/2502]  eta: 0:07:18  lr: 0.003203  min_lr: 0.003203  loss: 3.5763 (3.6076)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6565 (0.6682)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [102]  [1400/2502]  eta: 0:06:10  lr: 0.003202  min_lr: 0.003202  loss: 3.7086 (3.6106)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6240 (0.6665)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [102]  [1600/2502]  eta: 0:05:03  lr: 0.003200  min_lr: 0.003200  loss: 3.5227 (3.6155)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7013 (0.6695)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [102]  [1800/2502]  eta: 0:03:55  lr: 0.003199  min_lr: 0.003199  loss: 3.2883 (3.6103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6577 (0.6684)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [102]  [2000/2502]  eta: 0:02:48  lr: 0.003197  min_lr: 0.003197  loss: 3.7035 (3.6117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6272 (0.6679)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [102]  [2200/2502]  eta: 0:01:41  lr: 0.003196  min_lr: 0.003196  loss: 3.8136 (3.6081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6554 (0.6652)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [102]  [2400/2502]  eta: 0:00:34  lr: 0.003195  min_lr: 0.003195  loss: 3.7028 (3.6053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7403 (0.6667)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [102]  [2501/2502]  eta: 0:00:00  lr: 0.003194  min_lr: 0.003194  loss: 3.7035 (3.6024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6415 (0.6667)  time: 0.3005  data: 0.0009  max mem: 27255
Epoch: [102] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003194  min_lr: 0.003194  loss: 3.7035 (3.5884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6415 (0.6667)
Test:  [ 0/50]  eta: 0:02:46  loss: 0.6747 (0.6747)  acc1: 88.8000 (88.8000)  acc5: 98.4000 (98.4000)  time: 3.3339  data: 3.1420  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9682 (1.0281)  acc1: 80.8000 (80.5455)  acc5: 97.2000 (96.5091)  time: 0.4576  data: 0.2905  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0551 (1.0495)  acc1: 79.6000 (80.0000)  acc5: 96.8000 (96.5143)  time: 0.1729  data: 0.0087  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2556 (1.1699)  acc1: 75.6000 (77.7936)  acc5: 93.6000 (94.7097)  time: 0.1946  data: 0.0308  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4657 (1.2511)  acc1: 70.0000 (75.6683)  acc5: 90.4000 (93.6293)  time: 0.2434  data: 0.0794  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4603 (1.2649)  acc1: 69.6000 (75.0640)  acc5: 91.2000 (93.4800)  time: 0.2421  data: 0.0783  max mem: 27255
Test: Total time: 0:00:13 (0.2644 s / it)
* Acc@1 75.294 Acc@5 93.318 loss 1.263
Accuracy of the model on the 50000 test images: 75.3%
Max accuracy: 75.75%
Epoch: [103]  [   0/2502]  eta: 1:32:14  lr: 0.003194  min_lr: 0.003194  loss: 3.1428 (3.1428)  weight_decay: 0.0500 (0.0500)  time: 2.2118  data: 1.7446  max mem: 27255
Epoch: [103]  [ 200/2502]  eta: 0:13:13  lr: 0.003192  min_lr: 0.003192  loss: 3.5968 (3.5741)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6789 (0.6651)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [103]  [ 400/2502]  eta: 0:11:53  lr: 0.003191  min_lr: 0.003191  loss: 3.4553 (3.5750)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6421 (0.6627)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [103]  [ 600/2502]  eta: 0:10:43  lr: 0.003190  min_lr: 0.003190  loss: 3.6996 (3.5745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6875 (0.6630)  time: 0.3370  data: 0.0005  max mem: 27255
Epoch: [103]  [ 800/2502]  eta: 0:09:34  lr: 0.003188  min_lr: 0.003188  loss: 3.6984 (3.5600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6163 (0.6598)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [103]  [1000/2502]  eta: 0:08:26  lr: 0.003187  min_lr: 0.003187  loss: 3.7103 (3.5629)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6691 (0.6637)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [103]  [1200/2502]  eta: 0:07:18  lr: 0.003185  min_lr: 0.003185  loss: 3.8584 (3.5696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6109 (0.6618)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [103]  [1400/2502]  eta: 0:06:10  lr: 0.003184  min_lr: 0.003184  loss: 3.6235 (3.5800)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6530 (0.6608)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [103]  [1600/2502]  eta: 0:05:03  lr: 0.003182  min_lr: 0.003182  loss: 3.4810 (3.5794)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5989 (0.6584)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [103]  [1800/2502]  eta: 0:03:55  lr: 0.003181  min_lr: 0.003181  loss: 3.5178 (3.5826)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6801 (0.6582)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [103]  [2000/2502]  eta: 0:02:48  lr: 0.003179  min_lr: 0.003179  loss: 3.5386 (3.5876)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6211 (0.6545)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [103]  [2200/2502]  eta: 0:01:41  lr: 0.003178  min_lr: 0.003178  loss: 3.5831 (3.5813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6650 (0.6578)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [103]  [2400/2502]  eta: 0:00:34  lr: 0.003177  min_lr: 0.003177  loss: 3.4369 (3.5813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5976 (0.6551)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [103]  [2501/2502]  eta: 0:00:00  lr: 0.003176  min_lr: 0.003176  loss: 3.6875 (3.5835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6504 (0.6544)  time: 0.3000  data: 0.0007  max mem: 27255
Epoch: [103] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003176  min_lr: 0.003176  loss: 3.6875 (3.5813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6504 (0.6544)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.7566 (0.7566)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 2.8907  data: 2.6938  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9976 (1.0678)  acc1: 81.6000 (81.2727)  acc5: 96.4000 (95.8909)  time: 0.4121  data: 0.2453  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0456 (1.0769)  acc1: 78.8000 (80.3048)  acc5: 96.0000 (95.8095)  time: 0.2065  data: 0.0426  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2792 (1.1979)  acc1: 74.4000 (77.6258)  acc5: 93.2000 (94.1548)  time: 0.2301  data: 0.0650  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4911 (1.2706)  acc1: 70.4000 (75.7073)  acc5: 90.8000 (93.4829)  time: 0.2051  data: 0.0400  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4911 (1.2921)  acc1: 68.8000 (75.1200)  acc5: 91.2000 (93.2160)  time: 0.1814  data: 0.0175  max mem: 27255
Test: Total time: 0:00:12 (0.2540 s / it)
* Acc@1 75.156 Acc@5 93.288 loss 1.294
Accuracy of the model on the 50000 test images: 75.2%
Max accuracy: 75.75%
Epoch: [104]  [   0/2502]  eta: 1:36:48  lr: 0.003176  min_lr: 0.003176  loss: 3.1144 (3.1144)  weight_decay: 0.0500 (0.0500)  time: 2.3215  data: 1.3336  max mem: 27255
Epoch: [104]  [ 200/2502]  eta: 0:13:12  lr: 0.003174  min_lr: 0.003174  loss: 3.7205 (3.5549)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6593 (0.6711)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [104]  [ 400/2502]  eta: 0:11:54  lr: 0.003173  min_lr: 0.003173  loss: 3.7489 (3.5548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6434 (0.6798)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [104]  [ 600/2502]  eta: 0:10:42  lr: 0.003171  min_lr: 0.003171  loss: 3.7854 (3.5395)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6288 (0.6808)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [104]  [ 800/2502]  eta: 0:09:33  lr: 0.003170  min_lr: 0.003170  loss: 3.7808 (3.5513)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6484 (0.6752)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [104]  [1000/2502]  eta: 0:08:25  lr: 0.003169  min_lr: 0.003169  loss: 3.5695 (3.5506)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6529 (0.6756)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [104]  [1200/2502]  eta: 0:07:17  lr: 0.003167  min_lr: 0.003167  loss: 3.3968 (3.5445)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5969 (0.6742)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [104]  [1400/2502]  eta: 0:06:10  lr: 0.003166  min_lr: 0.003166  loss: 3.6499 (3.5514)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6495 (0.6702)  time: 0.3405  data: 0.0004  max mem: 27255
Epoch: [104]  [1600/2502]  eta: 0:05:02  lr: 0.003164  min_lr: 0.003164  loss: 3.8507 (3.5642)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5909 (0.6648)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [104]  [1800/2502]  eta: 0:03:55  lr: 0.003163  min_lr: 0.003163  loss: 3.3894 (3.5618)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6395 (0.6663)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [104]  [2000/2502]  eta: 0:02:48  lr: 0.003161  min_lr: 0.003161  loss: 3.8067 (3.5620)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6485 (0.6664)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [104]  [2200/2502]  eta: 0:01:41  lr: 0.003160  min_lr: 0.003160  loss: 3.7870 (3.5621)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6043 (0.6645)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [104]  [2400/2502]  eta: 0:00:34  lr: 0.003158  min_lr: 0.003158  loss: 3.7667 (3.5663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6950 (0.6657)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [104]  [2501/2502]  eta: 0:00:00  lr: 0.003158  min_lr: 0.003158  loss: 3.6474 (3.5717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6865 (0.6660)  time: 0.3058  data: 0.0008  max mem: 27255
Epoch: [104] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.003158  min_lr: 0.003158  loss: 3.6474 (3.5694)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6865 (0.6660)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.7088 (0.7088)  acc1: 87.6000 (87.6000)  acc5: 98.8000 (98.8000)  time: 2.8548  data: 2.6612  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0185 (1.0079)  acc1: 80.0000 (81.4182)  acc5: 96.4000 (96.1818)  time: 0.4324  data: 0.2660  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0423 (1.0319)  acc1: 79.6000 (80.5524)  acc5: 96.0000 (96.2857)  time: 0.1780  data: 0.0142  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2809 (1.1512)  acc1: 75.6000 (78.2839)  acc5: 93.2000 (94.5161)  time: 0.1974  data: 0.0335  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4864 (1.2260)  acc1: 71.2000 (76.4293)  acc5: 90.4000 (93.7463)  time: 0.2123  data: 0.0486  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4519 (1.2468)  acc1: 70.0000 (75.7120)  acc5: 91.6000 (93.5520)  time: 0.2013  data: 0.0377  max mem: 27255
Test: Total time: 0:00:12 (0.2458 s / it)
* Acc@1 75.872 Acc@5 93.494 loss 1.242
Accuracy of the model on the 50000 test images: 75.9%
Max accuracy: 75.87%
Epoch: [105]  [   0/2502]  eta: 1:17:06  lr: 0.003158  min_lr: 0.003158  loss: 3.8231 (3.8231)  weight_decay: 0.0500 (0.0500)  time: 1.8490  data: 1.4903  max mem: 27255
Epoch: [105]  [ 200/2502]  eta: 0:13:07  lr: 0.003156  min_lr: 0.003156  loss: 3.6627 (3.5597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6869 (0.6692)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [105]  [ 400/2502]  eta: 0:11:50  lr: 0.003155  min_lr: 0.003155  loss: 3.7586 (3.5430)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6118 (0.6568)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [105]  [ 600/2502]  eta: 0:10:41  lr: 0.003153  min_lr: 0.003153  loss: 3.5873 (3.5775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6721 (0.6658)  time: 0.3361  data: 0.0003  max mem: 27255
Epoch: [105]  [ 800/2502]  eta: 0:09:32  lr: 0.003152  min_lr: 0.003152  loss: 3.8236 (3.5763)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6267 (0.6649)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [105]  [1000/2502]  eta: 0:08:24  lr: 0.003150  min_lr: 0.003150  loss: 3.6153 (3.5775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6033 (0.6637)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [105]  [1200/2502]  eta: 0:07:17  lr: 0.003149  min_lr: 0.003149  loss: 3.7462 (3.5831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6569 (0.6610)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [105]  [1400/2502]  eta: 0:06:10  lr: 0.003147  min_lr: 0.003147  loss: 3.7464 (3.5827)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5928 (0.6612)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [105]  [1600/2502]  eta: 0:05:02  lr: 0.003146  min_lr: 0.003146  loss: 3.5560 (3.5730)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6059 (0.6605)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [105]  [1800/2502]  eta: 0:03:55  lr: 0.003144  min_lr: 0.003144  loss: 3.7127 (3.5714)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6993 (0.6577)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [105]  [2000/2502]  eta: 0:02:48  lr: 0.003143  min_lr: 0.003143  loss: 3.9644 (3.5730)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6188 (0.6582)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [105]  [2200/2502]  eta: 0:01:41  lr: 0.003141  min_lr: 0.003141  loss: 3.5321 (3.5733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6451 (0.6573)  time: 0.3346  data: 0.0003  max mem: 27255
Epoch: [105]  [2400/2502]  eta: 0:00:34  lr: 0.003140  min_lr: 0.003140  loss: 3.7826 (3.5747)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5994 (0.6587)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [105]  [2501/2502]  eta: 0:00:00  lr: 0.003139  min_lr: 0.003139  loss: 3.4568 (3.5746)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6778 (0.6610)  time: 0.3009  data: 0.0007  max mem: 27255
Epoch: [105] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003139  min_lr: 0.003139  loss: 3.4568 (3.5785)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6778 (0.6610)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.6159 (0.6159)  acc1: 89.6000 (89.6000)  acc5: 98.4000 (98.4000)  time: 2.7082  data: 2.5140  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9490 (0.9681)  acc1: 81.2000 (81.3455)  acc5: 95.6000 (95.7455)  time: 0.4168  data: 0.2492  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9876 (0.9744)  acc1: 80.4000 (81.1238)  acc5: 95.6000 (96.1143)  time: 0.2142  data: 0.0482  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1300 (1.0999)  acc1: 76.0000 (78.5419)  acc5: 94.8000 (94.4000)  time: 0.2301  data: 0.0621  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4173 (1.1777)  acc1: 70.8000 (76.1659)  acc5: 90.0000 (93.4146)  time: 0.2128  data: 0.0466  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3856 (1.1936)  acc1: 70.8000 (75.5680)  acc5: 91.2000 (93.3760)  time: 0.1851  data: 0.0213  max mem: 27255
Test: Total time: 0:00:12 (0.2564 s / it)
* Acc@1 75.744 Acc@5 93.458 loss 1.191
Accuracy of the model on the 50000 test images: 75.7%
Max accuracy: 75.87%
Epoch: [106]  [   0/2502]  eta: 1:26:48  lr: 0.003139  min_lr: 0.003139  loss: 4.1857 (4.1857)  weight_decay: 0.0500 (0.0500)  time: 2.0818  data: 1.5778  max mem: 27255
Epoch: [106]  [ 200/2502]  eta: 0:13:09  lr: 0.003138  min_lr: 0.003138  loss: 3.7468 (3.4850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6744 (0.6751)  time: 0.3326  data: 0.0003  max mem: 27255
Epoch: [106]  [ 400/2502]  eta: 0:11:55  lr: 0.003136  min_lr: 0.003136  loss: 3.6662 (3.5361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6063 (0.6631)  time: 0.3431  data: 0.0005  max mem: 27255
Epoch: [106]  [ 600/2502]  eta: 0:10:43  lr: 0.003135  min_lr: 0.003135  loss: 3.3907 (3.5156)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6762 (0.6622)  time: 0.3349  data: 0.0003  max mem: 27255
Epoch: [106]  [ 800/2502]  eta: 0:09:34  lr: 0.003133  min_lr: 0.003133  loss: 3.6109 (3.5195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6502 (0.6617)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [106]  [1000/2502]  eta: 0:08:25  lr: 0.003132  min_lr: 0.003132  loss: 3.6666 (3.5305)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6686 (0.6612)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [106]  [1200/2502]  eta: 0:07:17  lr: 0.003130  min_lr: 0.003130  loss: 3.5463 (3.5327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6509 (0.6644)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [106]  [1400/2502]  eta: 0:06:10  lr: 0.003129  min_lr: 0.003129  loss: 3.8213 (3.5329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6089 (0.6647)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [106]  [1600/2502]  eta: 0:05:02  lr: 0.003127  min_lr: 0.003127  loss: 3.8251 (3.5367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7235 (0.6666)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [106]  [1800/2502]  eta: 0:03:55  lr: 0.003126  min_lr: 0.003126  loss: 3.4275 (3.5431)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6186 (0.6644)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [106]  [2000/2502]  eta: 0:02:48  lr: 0.003124  min_lr: 0.003124  loss: 3.6914 (3.5397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7015 (0.6666)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [106]  [2200/2502]  eta: 0:01:41  lr: 0.003123  min_lr: 0.003123  loss: 3.3507 (3.5452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6219 (0.6643)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [106]  [2400/2502]  eta: 0:00:34  lr: 0.003121  min_lr: 0.003121  loss: 3.5827 (3.5466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6872 (0.6685)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [106]  [2501/2502]  eta: 0:00:00  lr: 0.003121  min_lr: 0.003121  loss: 3.5890 (3.5480)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6713 (0.6687)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [106] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003121  min_lr: 0.003121  loss: 3.5890 (3.5640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6713 (0.6687)
Test:  [ 0/50]  eta: 0:03:06  loss: 0.7744 (0.7744)  acc1: 90.4000 (90.4000)  acc5: 98.8000 (98.8000)  time: 3.7256  data: 3.5381  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9993 (1.0356)  acc1: 84.8000 (82.1091)  acc5: 95.6000 (96.0000)  time: 0.4886  data: 0.3221  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0156 (1.0433)  acc1: 80.4000 (81.2191)  acc5: 96.0000 (96.0571)  time: 0.1663  data: 0.0020  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2543 (1.1707)  acc1: 76.4000 (78.5032)  acc5: 92.8000 (94.3742)  time: 0.1756  data: 0.0117  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4607 (1.2406)  acc1: 72.0000 (76.5756)  acc5: 90.8000 (93.6098)  time: 0.2159  data: 0.0522  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4607 (1.2591)  acc1: 71.6000 (75.9040)  acc5: 91.2000 (93.5280)  time: 0.2059  data: 0.0424  max mem: 27255
Test: Total time: 0:00:12 (0.2586 s / it)
* Acc@1 75.764 Acc@5 93.484 loss 1.263
Accuracy of the model on the 50000 test images: 75.8%
Max accuracy: 75.87%
Epoch: [107]  [   0/2502]  eta: 1:15:55  lr: 0.003121  min_lr: 0.003121  loss: 3.8235 (3.8235)  weight_decay: 0.0500 (0.0500)  time: 1.8208  data: 1.3573  max mem: 27255
Epoch: [107]  [ 200/2502]  eta: 0:13:06  lr: 0.003119  min_lr: 0.003119  loss: 3.7180 (3.5964)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6425 (0.6482)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [107]  [ 400/2502]  eta: 0:11:50  lr: 0.003118  min_lr: 0.003118  loss: 3.4677 (3.5867)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6400 (0.6723)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [107]  [ 600/2502]  eta: 0:10:40  lr: 0.003116  min_lr: 0.003116  loss: 3.6847 (3.5761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6108 (inf)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [107]  [ 800/2502]  eta: 0:09:32  lr: 0.003115  min_lr: 0.003115  loss: 3.8068 (3.5893)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6525 (inf)  time: 0.3327  data: 0.0003  max mem: 27255
Epoch: [107]  [1000/2502]  eta: 0:08:24  lr: 0.003113  min_lr: 0.003113  loss: 3.8588 (3.5797)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6641 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [107]  [1200/2502]  eta: 0:07:16  lr: 0.003112  min_lr: 0.003112  loss: 3.6676 (3.5840)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7025 (inf)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [107]  [1400/2502]  eta: 0:06:09  lr: 0.003110  min_lr: 0.003110  loss: 3.3753 (3.5791)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6546 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [107]  [1600/2502]  eta: 0:05:02  lr: 0.003109  min_lr: 0.003109  loss: 3.4887 (3.5819)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7255 (inf)  time: 0.3346  data: 0.0003  max mem: 27255
Epoch: [107]  [1800/2502]  eta: 0:03:55  lr: 0.003107  min_lr: 0.003107  loss: 3.6363 (3.5835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6549 (inf)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [107]  [2000/2502]  eta: 0:02:48  lr: 0.003106  min_lr: 0.003106  loss: 3.6614 (3.5859)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6364 (inf)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [107]  [2200/2502]  eta: 0:01:41  lr: 0.003104  min_lr: 0.003104  loss: 3.8398 (3.5865)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6892 (inf)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [107]  [2400/2502]  eta: 0:00:34  lr: 0.003103  min_lr: 0.003103  loss: 3.5502 (3.5848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6337 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [107]  [2501/2502]  eta: 0:00:00  lr: 0.003102  min_lr: 0.003102  loss: 3.9503 (3.5851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6491 (inf)  time: 0.3000  data: 0.0008  max mem: 27255
Epoch: [107] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.003102  min_lr: 0.003102  loss: 3.9503 (3.5760)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6491 (inf)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.9132 (0.9132)  acc1: 89.6000 (89.6000)  acc5: 98.0000 (98.0000)  time: 2.6580  data: 2.4621  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.1267 (1.1806)  acc1: 81.2000 (82.2182)  acc5: 96.0000 (95.7818)  time: 0.4347  data: 0.2671  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.1867 (1.2001)  acc1: 80.4000 (81.0286)  acc5: 96.0000 (96.0762)  time: 0.2070  data: 0.0419  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.3542 (1.3195)  acc1: 75.2000 (78.2839)  acc5: 94.0000 (94.2452)  time: 0.2162  data: 0.0515  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.6156 (1.3922)  acc1: 71.2000 (76.2829)  acc5: 89.6000 (93.2390)  time: 0.1991  data: 0.0354  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.6017 (1.4055)  acc1: 70.8000 (75.5600)  acc5: 89.6000 (93.0240)  time: 0.1941  data: 0.0306  max mem: 27255
Test: Total time: 0:00:12 (0.2493 s / it)
* Acc@1 75.386 Acc@5 93.212 loss 1.409
Accuracy of the model on the 50000 test images: 75.4%
Max accuracy: 75.87%
Epoch: [108]  [   0/2502]  eta: 1:32:32  lr: 0.003102  min_lr: 0.003102  loss: 3.4497 (3.4497)  weight_decay: 0.0500 (0.0500)  time: 2.2194  data: 1.8767  max mem: 27255
Epoch: [108]  [ 200/2502]  eta: 0:13:12  lr: 0.003101  min_lr: 0.003101  loss: 3.5909 (3.5318)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6004 (0.6652)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [108]  [ 400/2502]  eta: 0:11:52  lr: 0.003099  min_lr: 0.003099  loss: 3.5616 (3.5364)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5723 (0.6567)  time: 0.3352  data: 0.0003  max mem: 27255
Epoch: [108]  [ 600/2502]  eta: 0:10:41  lr: 0.003098  min_lr: 0.003098  loss: 3.7791 (3.5355)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6676 (0.6629)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [108]  [ 800/2502]  eta: 0:09:32  lr: 0.003096  min_lr: 0.003096  loss: 2.9260 (3.5321)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6309 (0.6701)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [108]  [1000/2502]  eta: 0:08:24  lr: 0.003095  min_lr: 0.003095  loss: 3.4614 (3.5290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6882 (0.6705)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [108]  [1200/2502]  eta: 0:07:17  lr: 0.003093  min_lr: 0.003093  loss: 3.4876 (3.5354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6386 (0.6676)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [108]  [1400/2502]  eta: 0:06:09  lr: 0.003092  min_lr: 0.003092  loss: 3.4681 (3.5454)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6480 (0.6691)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [108]  [1600/2502]  eta: 0:05:02  lr: 0.003090  min_lr: 0.003090  loss: 3.7650 (3.5523)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6275 (0.6688)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [108]  [1800/2502]  eta: 0:03:55  lr: 0.003089  min_lr: 0.003089  loss: 3.6041 (3.5506)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6498 (0.6650)  time: 0.3356  data: 0.0005  max mem: 27255
Epoch: [108]  [2000/2502]  eta: 0:02:48  lr: 0.003087  min_lr: 0.003087  loss: 3.5935 (3.5543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6191 (0.6663)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [108]  [2200/2502]  eta: 0:01:41  lr: 0.003085  min_lr: 0.003085  loss: 3.6777 (3.5584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6285 (0.6647)  time: 0.3361  data: 0.0005  max mem: 27255
Epoch: [108]  [2400/2502]  eta: 0:00:34  lr: 0.003084  min_lr: 0.003084  loss: 3.4916 (3.5532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6431 (0.6648)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [108]  [2501/2502]  eta: 0:00:00  lr: 0.003083  min_lr: 0.003083  loss: 3.4696 (3.5527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6752 (0.6651)  time: 0.3011  data: 0.0013  max mem: 27255
Epoch: [108] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003083  min_lr: 0.003083  loss: 3.4696 (3.5628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6752 (0.6651)
Test:  [ 0/50]  eta: 0:02:21  loss: 0.6883 (0.6883)  acc1: 91.6000 (91.6000)  acc5: 98.0000 (98.0000)  time: 2.8329  data: 2.6297  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9580 (0.9546)  acc1: 81.2000 (81.8182)  acc5: 96.0000 (96.0000)  time: 0.4293  data: 0.2617  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9580 (0.9688)  acc1: 80.0000 (80.8381)  acc5: 96.0000 (96.2667)  time: 0.2174  data: 0.0535  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1282 (1.0778)  acc1: 75.2000 (78.4774)  acc5: 93.6000 (94.5806)  time: 0.2168  data: 0.0528  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3672 (1.1605)  acc1: 71.2000 (76.1854)  acc5: 90.0000 (93.4537)  time: 0.1880  data: 0.0242  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3672 (1.1807)  acc1: 70.0000 (75.6880)  acc5: 90.4000 (93.3120)  time: 0.1877  data: 0.0240  max mem: 27255
Test: Total time: 0:00:12 (0.2502 s / it)
* Acc@1 76.120 Acc@5 93.402 loss 1.177
Accuracy of the model on the 50000 test images: 76.1%
Max accuracy: 76.12%
Epoch: [109]  [   0/2502]  eta: 1:24:02  lr: 0.003083  min_lr: 0.003083  loss: 3.9260 (3.9260)  weight_decay: 0.0500 (0.0500)  time: 2.0155  data: 1.6611  max mem: 27255
Epoch: [109]  [ 200/2502]  eta: 0:13:13  lr: 0.003082  min_lr: 0.003082  loss: 3.7805 (3.4683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6106 (0.6338)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [109]  [ 400/2502]  eta: 0:11:53  lr: 0.003080  min_lr: 0.003080  loss: 3.5317 (3.5104)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6575 (0.6624)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [109]  [ 600/2502]  eta: 0:10:41  lr: 0.003079  min_lr: 0.003079  loss: 3.4407 (3.5267)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6655 (0.6679)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [109]  [ 800/2502]  eta: 0:09:32  lr: 0.003077  min_lr: 0.003077  loss: 3.5818 (3.5288)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [109]  [1000/2502]  eta: 0:08:24  lr: 0.003076  min_lr: 0.003076  loss: 3.6617 (3.5361)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6180 (nan)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [109]  [1200/2502]  eta: 0:07:17  lr: 0.003074  min_lr: 0.003074  loss: 3.6798 (3.5425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6639 (nan)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [109]  [1400/2502]  eta: 0:06:09  lr: 0.003073  min_lr: 0.003073  loss: 3.6204 (3.5471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6606 (nan)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [109]  [1600/2502]  eta: 0:05:02  lr: 0.003071  min_lr: 0.003071  loss: 3.6323 (3.5507)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6592 (nan)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [109]  [1800/2502]  eta: 0:03:55  lr: 0.003070  min_lr: 0.003070  loss: 3.6644 (3.5536)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6236 (nan)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [109]  [2000/2502]  eta: 0:02:48  lr: 0.003068  min_lr: 0.003068  loss: 3.8516 (3.5531)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6219 (nan)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [109]  [2200/2502]  eta: 0:01:41  lr: 0.003067  min_lr: 0.003067  loss: 3.5680 (3.5524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6576 (nan)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [109]  [2400/2502]  eta: 0:00:34  lr: 0.003065  min_lr: 0.003065  loss: 3.7514 (3.5550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6428 (nan)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [109]  [2501/2502]  eta: 0:00:00  lr: 0.003064  min_lr: 0.003064  loss: 3.4950 (3.5521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6610 (nan)  time: 0.3009  data: 0.0008  max mem: 27255
Epoch: [109] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.003064  min_lr: 0.003064  loss: 3.4950 (3.5583)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6610 (nan)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.6360 (0.6360)  acc1: 91.2000 (91.2000)  acc5: 99.2000 (99.2000)  time: 3.1287  data: 2.9302  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.0109 (1.0046)  acc1: 82.4000 (82.0000)  acc5: 96.0000 (96.1455)  time: 0.4967  data: 0.3296  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0288 (1.0291)  acc1: 80.4000 (80.9714)  acc5: 96.0000 (96.4000)  time: 0.2140  data: 0.0496  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1806 (1.1423)  acc1: 74.4000 (78.3226)  acc5: 94.8000 (94.8516)  time: 0.1847  data: 0.0170  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4198 (1.2171)  acc1: 69.6000 (76.3122)  acc5: 90.8000 (93.7659)  time: 0.1716  data: 0.0029  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4016 (1.2331)  acc1: 70.4000 (75.8400)  acc5: 90.8000 (93.5040)  time: 0.1685  data: 0.0028  max mem: 27255
Test: Total time: 0:00:12 (0.2482 s / it)
* Acc@1 75.712 Acc@5 93.576 loss 1.239
Accuracy of the model on the 50000 test images: 75.7%
Max accuracy: 76.12%
Epoch: [110]  [   0/2502]  eta: 1:35:20  lr: 0.003064  min_lr: 0.003064  loss: 2.6012 (2.6012)  weight_decay: 0.0500 (0.0500)  time: 2.2864  data: 1.6932  max mem: 27255
Epoch: [110]  [ 200/2502]  eta: 0:13:12  lr: 0.003063  min_lr: 0.003063  loss: 3.4760 (3.4912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6898 (0.6629)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [110]  [ 400/2502]  eta: 0:11:52  lr: 0.003061  min_lr: 0.003061  loss: 3.1766 (3.5366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6442 (0.6804)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [110]  [ 600/2502]  eta: 0:10:42  lr: 0.003060  min_lr: 0.003060  loss: 3.8498 (3.5553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6352 (0.6815)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [110]  [ 800/2502]  eta: 0:09:33  lr: 0.003058  min_lr: 0.003058  loss: 3.7192 (3.5478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6737 (0.6755)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [110]  [1000/2502]  eta: 0:08:25  lr: 0.003057  min_lr: 0.003057  loss: 3.6326 (3.5535)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6822 (0.6817)  time: 0.3395  data: 0.0004  max mem: 27255
Epoch: [110]  [1200/2502]  eta: 0:07:18  lr: 0.003055  min_lr: 0.003055  loss: 3.6654 (3.5610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6270 (0.6713)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [110]  [1400/2502]  eta: 0:06:10  lr: 0.003054  min_lr: 0.003054  loss: 3.5569 (3.5567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6738 (0.6726)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [110]  [1600/2502]  eta: 0:05:03  lr: 0.003052  min_lr: 0.003052  loss: 3.5337 (3.5576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5965 (0.6694)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [110]  [1800/2502]  eta: 0:03:55  lr: 0.003051  min_lr: 0.003051  loss: 3.6261 (3.5596)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6398 (0.6702)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [110]  [2000/2502]  eta: 0:02:48  lr: 0.003049  min_lr: 0.003049  loss: 3.6801 (3.5568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6184 (0.6712)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [110]  [2200/2502]  eta: 0:01:41  lr: 0.003048  min_lr: 0.003048  loss: 3.6985 (3.5632)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7110 (0.6698)  time: 0.3328  data: 0.0003  max mem: 27255
Epoch: [110]  [2400/2502]  eta: 0:00:34  lr: 0.003046  min_lr: 0.003046  loss: 3.6216 (3.5570)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6756 (0.6700)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [110]  [2501/2502]  eta: 0:00:00  lr: 0.003045  min_lr: 0.003045  loss: 3.3550 (3.5589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6497 (0.6699)  time: 0.3009  data: 0.0009  max mem: 27255
Epoch: [110] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.003045  min_lr: 0.003045  loss: 3.3550 (3.5548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6497 (0.6699)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.5949 (0.5949)  acc1: 90.8000 (90.8000)  acc5: 98.8000 (98.8000)  time: 2.6407  data: 2.4493  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8987 (0.9437)  acc1: 83.6000 (82.7273)  acc5: 96.0000 (96.1091)  time: 0.4261  data: 0.2598  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9319 (0.9626)  acc1: 80.0000 (81.5048)  acc5: 96.4000 (96.2857)  time: 0.2407  data: 0.0769  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1351 (1.0859)  acc1: 76.8000 (78.9806)  acc5: 93.6000 (94.5161)  time: 0.2563  data: 0.0900  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3839 (1.1675)  acc1: 72.0000 (77.0439)  acc5: 90.0000 (93.5122)  time: 0.2053  data: 0.0392  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3698 (1.1833)  acc1: 70.8000 (76.4000)  acc5: 90.8000 (93.4800)  time: 0.1757  data: 0.0102  max mem: 27255
Test: Total time: 0:00:13 (0.2632 s / it)
* Acc@1 76.006 Acc@5 93.468 loss 1.191
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 76.12%
Epoch: [111]  [   0/2502]  eta: 1:26:07  lr: 0.003045  min_lr: 0.003045  loss: 3.8553 (3.8553)  weight_decay: 0.0500 (0.0500)  time: 2.0653  data: 1.7180  max mem: 27255
Epoch: [111]  [ 200/2502]  eta: 0:13:10  lr: 0.003044  min_lr: 0.003044  loss: 3.7722 (3.4962)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6643 (0.6629)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [111]  [ 400/2502]  eta: 0:11:51  lr: 0.003042  min_lr: 0.003042  loss: 3.7988 (3.4901)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6777 (0.6564)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [111]  [ 600/2502]  eta: 0:10:42  lr: 0.003041  min_lr: 0.003041  loss: 3.6226 (3.5014)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6555 (0.6678)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [111]  [ 800/2502]  eta: 0:09:33  lr: 0.003039  min_lr: 0.003039  loss: 3.5749 (3.5055)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6585 (0.6710)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [111]  [1000/2502]  eta: 0:08:25  lr: 0.003038  min_lr: 0.003038  loss: 3.6579 (3.5033)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6788 (0.6706)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [111]  [1200/2502]  eta: 0:07:17  lr: 0.003036  min_lr: 0.003036  loss: 3.7931 (3.5143)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6979 (0.6746)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [111]  [1400/2502]  eta: 0:06:10  lr: 0.003035  min_lr: 0.003035  loss: 3.3507 (3.5110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6719 (0.6776)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [111]  [1600/2502]  eta: 0:05:02  lr: 0.003033  min_lr: 0.003033  loss: 3.3755 (3.5162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6432 (0.6797)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [111]  [1800/2502]  eta: 0:03:55  lr: 0.003031  min_lr: 0.003031  loss: 3.7015 (3.5192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6465 (0.6782)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [111]  [2000/2502]  eta: 0:02:48  lr: 0.003030  min_lr: 0.003030  loss: 3.7264 (3.5318)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6253 (0.6793)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [111]  [2200/2502]  eta: 0:01:41  lr: 0.003028  min_lr: 0.003028  loss: 3.7517 (3.5398)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6559 (0.6778)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [111]  [2400/2502]  eta: 0:00:34  lr: 0.003027  min_lr: 0.003027  loss: 3.7601 (3.5358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6672 (0.6745)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [111]  [2501/2502]  eta: 0:00:00  lr: 0.003026  min_lr: 0.003026  loss: 3.6005 (3.5399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6337 (0.6748)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [111] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.003026  min_lr: 0.003026  loss: 3.6005 (3.5543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6337 (0.6748)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.7078 (0.7078)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.7724  data: 2.5831  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.1172 (1.1260)  acc1: 82.0000 (82.2546)  acc5: 96.8000 (96.2909)  time: 0.4504  data: 0.2842  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.1172 (1.1286)  acc1: 80.8000 (81.3714)  acc5: 96.8000 (96.4000)  time: 0.2487  data: 0.0849  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2987 (1.2422)  acc1: 76.4000 (78.8258)  acc5: 92.8000 (94.8000)  time: 0.2792  data: 0.1154  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.5261 (1.3128)  acc1: 70.4000 (76.7317)  acc5: 90.4000 (93.8341)  time: 0.2214  data: 0.0578  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5108 (1.3274)  acc1: 70.4000 (75.9520)  acc5: 90.8000 (93.6640)  time: 0.1637  data: 0.0002  max mem: 27255
Test: Total time: 0:00:13 (0.2752 s / it)
* Acc@1 75.752 Acc@5 93.640 loss 1.335
Accuracy of the model on the 50000 test images: 75.8%
Max accuracy: 76.12%
Epoch: [112]  [   0/2502]  eta: 1:20:11  lr: 0.003026  min_lr: 0.003026  loss: 3.5227 (3.5227)  weight_decay: 0.0500 (0.0500)  time: 1.9231  data: 1.4246  max mem: 27255
Epoch: [112]  [ 200/2502]  eta: 0:13:06  lr: 0.003025  min_lr: 0.003025  loss: 3.5820 (3.5077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7358 (0.6669)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [112]  [ 400/2502]  eta: 0:11:49  lr: 0.003023  min_lr: 0.003023  loss: 3.2330 (3.5144)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6652 (0.6608)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [112]  [ 600/2502]  eta: 0:10:39  lr: 0.003021  min_lr: 0.003021  loss: 3.6753 (3.5360)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6762 (0.6715)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [112]  [ 800/2502]  eta: 0:09:31  lr: 0.003020  min_lr: 0.003020  loss: 3.5748 (3.5465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6023 (0.6643)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [112]  [1000/2502]  eta: 0:08:24  lr: 0.003018  min_lr: 0.003018  loss: 3.4511 (3.5416)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6703 (0.6711)  time: 0.3423  data: 0.0005  max mem: 27255
Epoch: [112]  [1200/2502]  eta: 0:07:16  lr: 0.003017  min_lr: 0.003017  loss: 3.7766 (3.5443)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6896 (0.6768)  time: 0.3364  data: 0.0005  max mem: 27255
Epoch: [112]  [1400/2502]  eta: 0:06:09  lr: 0.003015  min_lr: 0.003015  loss: 3.9158 (3.5472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7092 (0.6817)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [112]  [1600/2502]  eta: 0:05:02  lr: 0.003014  min_lr: 0.003014  loss: 3.5362 (3.5380)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7262 (0.6809)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [112]  [1800/2502]  eta: 0:03:55  lr: 0.003012  min_lr: 0.003012  loss: 3.4953 (3.5304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5882 (0.6773)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [112]  [2000/2502]  eta: 0:02:48  lr: 0.003011  min_lr: 0.003011  loss: 3.5418 (3.5284)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6547 (0.6753)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [112]  [2200/2502]  eta: 0:01:41  lr: 0.003009  min_lr: 0.003009  loss: 3.6671 (3.5293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6769 (0.6757)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [112]  [2400/2502]  eta: 0:00:34  lr: 0.003007  min_lr: 0.003007  loss: 3.5230 (3.5298)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6974 (0.6763)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [112]  [2501/2502]  eta: 0:00:00  lr: 0.003007  min_lr: 0.003007  loss: 3.6589 (3.5322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6721 (0.6773)  time: 0.3003  data: 0.0011  max mem: 27255
Epoch: [112] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.003007  min_lr: 0.003007  loss: 3.6589 (3.5425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6721 (0.6773)
Test:  [ 0/50]  eta: 0:02:14  loss: 0.8068 (0.8068)  acc1: 90.0000 (90.0000)  acc5: 98.8000 (98.8000)  time: 2.6869  data: 2.4806  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.2147 (1.1910)  acc1: 81.2000 (82.2182)  acc5: 96.0000 (96.1818)  time: 0.4028  data: 0.2349  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1780 (1.1866)  acc1: 79.6000 (80.8000)  acc5: 96.0000 (96.2476)  time: 0.2367  data: 0.0710  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3154 (1.2944)  acc1: 76.0000 (78.5161)  acc5: 95.2000 (94.7355)  time: 0.2882  data: 0.1226  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.5657 (1.3592)  acc1: 71.6000 (76.7220)  acc5: 90.4000 (93.7366)  time: 0.2705  data: 0.1068  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5657 (1.3750)  acc1: 71.6000 (76.0800)  acc5: 91.6000 (93.5520)  time: 0.2137  data: 0.0501  max mem: 27255
Test: Total time: 0:00:14 (0.2881 s / it)
* Acc@1 76.116 Acc@5 93.604 loss 1.379
Accuracy of the model on the 50000 test images: 76.1%
Max accuracy: 76.12%
Epoch: [113]  [   0/2502]  eta: 1:28:15  lr: 0.003007  min_lr: 0.003007  loss: 3.6785 (3.6785)  weight_decay: 0.0500 (0.0500)  time: 2.1164  data: 1.4619  max mem: 27255
Epoch: [113]  [ 200/2502]  eta: 0:13:10  lr: 0.003005  min_lr: 0.003005  loss: 3.4692 (3.5428)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6319 (0.6740)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [113]  [ 400/2502]  eta: 0:11:52  lr: 0.003004  min_lr: 0.003004  loss: 3.8969 (3.5518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6890 (0.6765)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [113]  [ 600/2502]  eta: 0:10:41  lr: 0.003002  min_lr: 0.003002  loss: 3.5082 (3.5411)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6292 (0.6722)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [113]  [ 800/2502]  eta: 0:09:34  lr: 0.003001  min_lr: 0.003001  loss: 3.8015 (3.5362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6314 (0.6710)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [113]  [1000/2502]  eta: 0:08:25  lr: 0.002999  min_lr: 0.002999  loss: 3.7419 (3.5462)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6656 (0.6791)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [113]  [1200/2502]  eta: 0:07:17  lr: 0.002997  min_lr: 0.002997  loss: 3.7036 (3.5558)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6645 (0.6809)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [113]  [1400/2502]  eta: 0:06:10  lr: 0.002996  min_lr: 0.002996  loss: 3.3928 (3.5531)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6652 (0.6766)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [113]  [1600/2502]  eta: 0:05:02  lr: 0.002994  min_lr: 0.002994  loss: 3.6912 (3.5576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6534 (0.6755)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [113]  [1800/2502]  eta: 0:03:55  lr: 0.002993  min_lr: 0.002993  loss: 3.3610 (3.5537)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6249 (0.6771)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [113]  [2000/2502]  eta: 0:02:48  lr: 0.002991  min_lr: 0.002991  loss: 3.6828 (3.5528)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6933 (0.6771)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [113]  [2200/2502]  eta: 0:01:41  lr: 0.002990  min_lr: 0.002990  loss: 3.3271 (3.5510)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6838 (0.6798)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [113]  [2400/2502]  eta: 0:00:34  lr: 0.002988  min_lr: 0.002988  loss: 3.4967 (3.5552)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.5904 (0.6772)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [113]  [2501/2502]  eta: 0:00:00  lr: 0.002987  min_lr: 0.002987  loss: 3.5815 (3.5512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6620 (0.6771)  time: 0.3004  data: 0.0008  max mem: 27255
Epoch: [113] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002987  min_lr: 0.002987  loss: 3.5815 (3.5500)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6620 (0.6771)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.5878 (0.5878)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 2.6538  data: 2.4592  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9787 (0.9560)  acc1: 82.4000 (82.0364)  acc5: 96.4000 (96.3273)  time: 0.4204  data: 0.2539  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9787 (0.9842)  acc1: 79.6000 (80.8571)  acc5: 96.4000 (96.4000)  time: 0.2169  data: 0.0532  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1861 (1.1084)  acc1: 75.2000 (78.5936)  acc5: 94.0000 (94.7484)  time: 0.2368  data: 0.0730  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4079 (1.1858)  acc1: 72.0000 (76.6732)  acc5: 90.4000 (93.8341)  time: 0.2051  data: 0.0414  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4079 (1.2075)  acc1: 69.6000 (76.0240)  acc5: 91.2000 (93.7280)  time: 0.1720  data: 0.0085  max mem: 27255
Test: Total time: 0:00:12 (0.2548 s / it)
* Acc@1 76.038 Acc@5 93.658 loss 1.209
Accuracy of the model on the 50000 test images: 76.0%
Max accuracy: 76.12%
Epoch: [114]  [   0/2502]  eta: 1:33:11  lr: 0.002987  min_lr: 0.002987  loss: 3.7818 (3.7818)  weight_decay: 0.0500 (0.0500)  time: 2.2348  data: 1.8983  max mem: 27255
Epoch: [114]  [ 200/2502]  eta: 0:13:11  lr: 0.002986  min_lr: 0.002986  loss: 3.7737 (3.5365)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6712 (0.7289)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [114]  [ 400/2502]  eta: 0:11:52  lr: 0.002984  min_lr: 0.002984  loss: 3.4145 (3.5299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6323 (0.6933)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [114]  [ 600/2502]  eta: 0:10:41  lr: 0.002983  min_lr: 0.002983  loss: 3.7528 (3.5547)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6400 (0.6991)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [114]  [ 800/2502]  eta: 0:09:32  lr: 0.002981  min_lr: 0.002981  loss: 3.2013 (3.5444)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7603 (0.7017)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [114]  [1000/2502]  eta: 0:08:25  lr: 0.002979  min_lr: 0.002979  loss: 3.7033 (3.5539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6798 (0.6956)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [114]  [1200/2502]  eta: 0:07:17  lr: 0.002978  min_lr: 0.002978  loss: 3.7936 (3.5512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6232 (0.6878)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [114]  [1400/2502]  eta: 0:06:10  lr: 0.002976  min_lr: 0.002976  loss: 3.8168 (3.5580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6478 (0.6850)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [114]  [1600/2502]  eta: 0:05:02  lr: 0.002975  min_lr: 0.002975  loss: 3.4147 (3.5479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6339 (0.6868)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [114]  [1800/2502]  eta: 0:03:55  lr: 0.002973  min_lr: 0.002973  loss: 3.4024 (3.5501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6418 (0.6870)  time: 0.3446  data: 0.0006  max mem: 27255
Epoch: [114]  [2000/2502]  eta: 0:02:48  lr: 0.002972  min_lr: 0.002972  loss: 3.6687 (3.5440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6330 (0.6830)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [114]  [2200/2502]  eta: 0:01:41  lr: 0.002970  min_lr: 0.002970  loss: 3.6466 (3.5442)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6673 (0.6836)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [114]  [2400/2502]  eta: 0:00:34  lr: 0.002968  min_lr: 0.002968  loss: 3.3677 (3.5360)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7053 (0.6828)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [114]  [2501/2502]  eta: 0:00:00  lr: 0.002968  min_lr: 0.002968  loss: 3.7327 (3.5368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6626 (0.6832)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [114] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002968  min_lr: 0.002968  loss: 3.7327 (3.5385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6626 (0.6832)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.7140 (0.7140)  acc1: 91.2000 (91.2000)  acc5: 98.4000 (98.4000)  time: 3.0100  data: 2.8098  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9986 (1.0377)  acc1: 83.2000 (82.5818)  acc5: 96.4000 (96.3636)  time: 0.4450  data: 0.2776  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0508 (1.0514)  acc1: 81.6000 (82.0000)  acc5: 96.4000 (96.5333)  time: 0.2212  data: 0.0572  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2292 (1.1618)  acc1: 78.0000 (79.6258)  acc5: 94.4000 (94.8774)  time: 0.2513  data: 0.0874  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4877 (1.2484)  acc1: 71.6000 (77.1610)  acc5: 90.8000 (93.9512)  time: 0.2254  data: 0.0616  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4862 (1.2673)  acc1: 71.2000 (76.5440)  acc5: 91.6000 (93.8080)  time: 0.1853  data: 0.0217  max mem: 27255
Test: Total time: 0:00:13 (0.2713 s / it)
* Acc@1 76.220 Acc@5 93.674 loss 1.271
Accuracy of the model on the 50000 test images: 76.2%
Max accuracy: 76.22%
Epoch: [115]  [   0/2502]  eta: 1:29:45  lr: 0.002968  min_lr: 0.002968  loss: 3.6804 (3.6804)  weight_decay: 0.0500 (0.0500)  time: 2.1524  data: 1.8017  max mem: 27255
Epoch: [115]  [ 200/2502]  eta: 0:13:09  lr: 0.002966  min_lr: 0.002966  loss: 3.6384 (3.6002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6472 (0.6748)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [115]  [ 400/2502]  eta: 0:11:53  lr: 0.002965  min_lr: 0.002965  loss: 3.4930 (3.5391)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6788 (0.6817)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [115]  [ 600/2502]  eta: 0:10:42  lr: 0.002963  min_lr: 0.002963  loss: 3.4358 (3.5067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6393 (0.6793)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [115]  [ 800/2502]  eta: 0:09:34  lr: 0.002961  min_lr: 0.002961  loss: 3.8047 (3.5187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6909 (0.6867)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [115]  [1000/2502]  eta: 0:08:25  lr: 0.002960  min_lr: 0.002960  loss: 3.4521 (3.5116)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6320 (0.6800)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [115]  [1200/2502]  eta: 0:07:17  lr: 0.002958  min_lr: 0.002958  loss: 3.8034 (3.5082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6860 (0.6845)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [115]  [1400/2502]  eta: 0:06:10  lr: 0.002957  min_lr: 0.002957  loss: 3.7787 (3.5153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6914 (0.6849)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [115]  [1600/2502]  eta: 0:05:02  lr: 0.002955  min_lr: 0.002955  loss: 3.3129 (3.5264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6225 (0.6835)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [115]  [1800/2502]  eta: 0:03:55  lr: 0.002954  min_lr: 0.002954  loss: 3.6230 (3.5271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6132 (0.6822)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [115]  [2000/2502]  eta: 0:02:48  lr: 0.002952  min_lr: 0.002952  loss: 3.5673 (3.5291)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7098 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [115]  [2200/2502]  eta: 0:01:41  lr: 0.002950  min_lr: 0.002950  loss: 3.6933 (3.5297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6327 (inf)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [115]  [2400/2502]  eta: 0:00:34  lr: 0.002949  min_lr: 0.002949  loss: 3.6574 (3.5303)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6240 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [115]  [2501/2502]  eta: 0:00:00  lr: 0.002948  min_lr: 0.002948  loss: 3.3523 (3.5274)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6852 (inf)  time: 0.3008  data: 0.0007  max mem: 27255
Epoch: [115] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002948  min_lr: 0.002948  loss: 3.3523 (3.5347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6852 (inf)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.6063 (0.6063)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.7062  data: 2.5185  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8701 (0.9111)  acc1: 81.2000 (82.1091)  acc5: 96.4000 (96.4364)  time: 0.4361  data: 0.2701  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9178 (0.9250)  acc1: 80.0000 (81.3143)  acc5: 96.4000 (96.4571)  time: 0.2139  data: 0.0501  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0591 (1.0431)  acc1: 75.6000 (79.0839)  acc5: 93.6000 (94.9290)  time: 0.2319  data: 0.0681  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3531 (1.1153)  acc1: 71.6000 (77.2683)  acc5: 91.2000 (94.1073)  time: 0.2080  data: 0.0436  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3531 (1.1319)  acc1: 70.4000 (76.5840)  acc5: 91.6000 (93.9280)  time: 0.1747  data: 0.0104  max mem: 27255
Test: Total time: 0:00:12 (0.2572 s / it)
* Acc@1 76.326 Acc@5 93.804 loss 1.143
Accuracy of the model on the 50000 test images: 76.3%
Max accuracy: 76.33%
Epoch: [116]  [   0/2502]  eta: 1:17:52  lr: 0.002948  min_lr: 0.002948  loss: 3.5331 (3.5331)  weight_decay: 0.0500 (0.0500)  time: 1.8675  data: 1.5268  max mem: 27255
Epoch: [116]  [ 200/2502]  eta: 0:13:11  lr: 0.002946  min_lr: 0.002946  loss: 3.7199 (3.5162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6828 (0.6902)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [116]  [ 400/2502]  eta: 0:11:55  lr: 0.002945  min_lr: 0.002945  loss: 2.9731 (3.5247)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6443 (0.6894)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [116]  [ 600/2502]  eta: 0:10:43  lr: 0.002943  min_lr: 0.002943  loss: 3.6464 (3.5314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6419 (0.6904)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [116]  [ 800/2502]  eta: 0:09:34  lr: 0.002942  min_lr: 0.002942  loss: 3.1828 (3.5283)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6665 (0.6869)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [116]  [1000/2502]  eta: 0:08:25  lr: 0.002940  min_lr: 0.002940  loss: 3.7450 (3.5317)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6778 (0.6926)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [116]  [1200/2502]  eta: 0:07:17  lr: 0.002939  min_lr: 0.002939  loss: 3.5255 (3.5236)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6368 (0.6922)  time: 0.3347  data: 0.0003  max mem: 27255
Epoch: [116]  [1400/2502]  eta: 0:06:10  lr: 0.002937  min_lr: 0.002937  loss: 3.5616 (3.5209)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6319 (0.6916)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [116]  [1600/2502]  eta: 0:05:02  lr: 0.002935  min_lr: 0.002935  loss: 3.6135 (3.5283)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7339 (0.6958)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [116]  [1800/2502]  eta: 0:03:55  lr: 0.002934  min_lr: 0.002934  loss: 3.5637 (3.5327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6649 (0.6980)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [116]  [2000/2502]  eta: 0:02:48  lr: 0.002932  min_lr: 0.002932  loss: 3.6636 (3.5426)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6538 (0.6947)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [116]  [2200/2502]  eta: 0:01:41  lr: 0.002931  min_lr: 0.002931  loss: 3.6591 (3.5402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6743 (0.6952)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [116]  [2400/2502]  eta: 0:00:34  lr: 0.002929  min_lr: 0.002929  loss: 3.7885 (3.5397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6761 (0.6954)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [116]  [2501/2502]  eta: 0:00:00  lr: 0.002928  min_lr: 0.002928  loss: 3.7350 (3.5424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6392 (0.6940)  time: 0.3006  data: 0.0007  max mem: 27255
Epoch: [116] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.002928  min_lr: 0.002928  loss: 3.7350 (3.5383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6392 (0.6940)
Test:  [ 0/50]  eta: 0:02:53  loss: 0.7374 (0.7374)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 3.4678  data: 3.2786  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0790 (1.1110)  acc1: 82.8000 (82.7273)  acc5: 96.8000 (96.1818)  time: 0.4679  data: 0.3012  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1051 (1.1334)  acc1: 81.2000 (81.2952)  acc5: 96.4000 (96.3048)  time: 0.1937  data: 0.0271  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.3537 (1.2421)  acc1: 76.0000 (78.5936)  acc5: 92.8000 (94.7226)  time: 0.2153  data: 0.0487  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4880 (1.3092)  acc1: 72.0000 (76.8976)  acc5: 90.8000 (93.8342)  time: 0.2009  data: 0.0364  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4872 (1.3183)  acc1: 72.0000 (76.2480)  acc5: 91.6000 (93.6720)  time: 0.1978  data: 0.0334  max mem: 27255
Test: Total time: 0:00:12 (0.2586 s / it)
* Acc@1 76.166 Acc@5 93.588 loss 1.328
Accuracy of the model on the 50000 test images: 76.2%
Max accuracy: 76.33%
Epoch: [117]  [   0/2502]  eta: 1:32:44  lr: 0.002928  min_lr: 0.002928  loss: 3.5462 (3.5462)  weight_decay: 0.0500 (0.0500)  time: 2.2238  data: 1.8686  max mem: 27255
Epoch: [117]  [ 200/2502]  eta: 0:13:10  lr: 0.002927  min_lr: 0.002927  loss: 3.7172 (3.5901)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6637 (0.6706)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [117]  [ 400/2502]  eta: 0:11:51  lr: 0.002925  min_lr: 0.002925  loss: 3.3341 (3.5927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6531 (0.6776)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [117]  [ 600/2502]  eta: 0:10:40  lr: 0.002923  min_lr: 0.002923  loss: 3.5699 (3.5605)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6299 (0.6715)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [117]  [ 800/2502]  eta: 0:09:32  lr: 0.002922  min_lr: 0.002922  loss: 3.4981 (3.5571)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6440 (0.6718)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [117]  [1000/2502]  eta: 0:08:24  lr: 0.002920  min_lr: 0.002920  loss: 3.6908 (3.5516)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6888 (0.6786)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [117]  [1200/2502]  eta: 0:07:16  lr: 0.002919  min_lr: 0.002919  loss: 3.6360 (3.5539)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6177 (0.6771)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [117]  [1400/2502]  eta: 0:06:09  lr: 0.002917  min_lr: 0.002917  loss: 3.6938 (3.5518)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6276 (0.6797)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [117]  [1600/2502]  eta: 0:05:02  lr: 0.002915  min_lr: 0.002915  loss: 3.5174 (3.5433)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6424 (0.6766)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [117]  [1800/2502]  eta: 0:03:55  lr: 0.002914  min_lr: 0.002914  loss: 3.8945 (3.5478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6587 (0.6806)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [117]  [2000/2502]  eta: 0:02:48  lr: 0.002912  min_lr: 0.002912  loss: 3.5984 (3.5408)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6641 (0.6788)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [117]  [2200/2502]  eta: 0:01:41  lr: 0.002911  min_lr: 0.002911  loss: 3.5976 (3.5446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6884 (0.6789)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [117]  [2400/2502]  eta: 0:00:34  lr: 0.002909  min_lr: 0.002909  loss: 3.7687 (3.5449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7145 (0.6825)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [117]  [2501/2502]  eta: 0:00:00  lr: 0.002908  min_lr: 0.002908  loss: 3.5725 (3.5454)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6172 (0.6816)  time: 0.3005  data: 0.0008  max mem: 27255
Epoch: [117] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.002908  min_lr: 0.002908  loss: 3.5725 (3.5400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6172 (0.6816)
Test:  [ 0/50]  eta: 0:02:23  loss: 0.7151 (0.7151)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 2.8725  data: 2.6786  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0503 (1.0519)  acc1: 84.0000 (82.9455)  acc5: 96.8000 (96.5455)  time: 0.4667  data: 0.3001  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0841 (1.1013)  acc1: 80.8000 (81.5048)  acc5: 96.8000 (96.4762)  time: 0.2069  data: 0.0430  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2298 (1.2046)  acc1: 75.2000 (79.0839)  acc5: 92.8000 (94.8129)  time: 0.2010  data: 0.0371  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4894 (1.2719)  acc1: 70.8000 (77.0927)  acc5: 91.2000 (93.9805)  time: 0.2078  data: 0.0440  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4757 (1.2828)  acc1: 70.8000 (76.4800)  acc5: 91.6000 (93.8720)  time: 0.1963  data: 0.0327  max mem: 27255
Test: Total time: 0:00:12 (0.2545 s / it)
* Acc@1 76.302 Acc@5 93.710 loss 1.298
Accuracy of the model on the 50000 test images: 76.3%
Max accuracy: 76.33%
Epoch: [118]  [   0/2502]  eta: 1:31:22  lr: 0.002908  min_lr: 0.002908  loss: 2.9449 (2.9449)  weight_decay: 0.0500 (0.0500)  time: 2.1912  data: 1.7230  max mem: 27255
Epoch: [118]  [ 200/2502]  eta: 0:13:14  lr: 0.002907  min_lr: 0.002907  loss: 3.0109 (3.4836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6706 (0.7002)  time: 0.3415  data: 0.0003  max mem: 27255
Epoch: [118]  [ 400/2502]  eta: 0:11:53  lr: 0.002905  min_lr: 0.002905  loss: 3.2219 (3.4907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6319 (0.6869)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [118]  [ 600/2502]  eta: 0:10:42  lr: 0.002903  min_lr: 0.002903  loss: 3.6485 (3.4990)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6328 (0.6882)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [118]  [ 800/2502]  eta: 0:09:33  lr: 0.002902  min_lr: 0.002902  loss: 3.5921 (3.4976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6417 (0.6903)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [118]  [1000/2502]  eta: 0:08:25  lr: 0.002900  min_lr: 0.002900  loss: 3.6252 (3.4996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6690 (0.6882)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [118]  [1200/2502]  eta: 0:07:17  lr: 0.002899  min_lr: 0.002899  loss: 3.7614 (3.5021)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6468 (0.6899)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [118]  [1400/2502]  eta: 0:06:09  lr: 0.002897  min_lr: 0.002897  loss: 3.7004 (3.5061)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7851 (0.6940)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [118]  [1600/2502]  eta: 0:05:02  lr: 0.002895  min_lr: 0.002895  loss: 3.4407 (3.5066)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6784 (0.6929)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [118]  [1800/2502]  eta: 0:03:55  lr: 0.002894  min_lr: 0.002894  loss: 3.7165 (3.5091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6261 (0.6909)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [118]  [2000/2502]  eta: 0:02:48  lr: 0.002892  min_lr: 0.002892  loss: 3.6030 (3.5121)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6711 (0.6895)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [118]  [2200/2502]  eta: 0:01:41  lr: 0.002891  min_lr: 0.002891  loss: 3.5085 (3.5157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6543 (0.6882)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [118]  [2400/2502]  eta: 0:00:34  lr: 0.002889  min_lr: 0.002889  loss: 3.5561 (3.5169)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6443 (0.6879)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [118]  [2501/2502]  eta: 0:00:00  lr: 0.002888  min_lr: 0.002888  loss: 3.5438 (3.5168)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6785 (0.6878)  time: 0.3014  data: 0.0007  max mem: 27255
Epoch: [118] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.002888  min_lr: 0.002888  loss: 3.5438 (3.5289)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6785 (0.6878)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.6404 (0.6404)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 2.7664  data: 2.5650  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9204 (0.9683)  acc1: 82.0000 (81.8545)  acc5: 96.4000 (96.1455)  time: 0.4380  data: 0.2707  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9632 (0.9828)  acc1: 80.8000 (80.9143)  acc5: 96.4000 (96.3810)  time: 0.1901  data: 0.0263  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0883 (1.0781)  acc1: 76.8000 (78.8516)  acc5: 94.4000 (94.8258)  time: 0.1778  data: 0.0140  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3522 (1.1543)  acc1: 71.6000 (76.8098)  acc5: 90.8000 (93.9415)  time: 0.1858  data: 0.0221  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3522 (1.1735)  acc1: 71.2000 (76.3120)  acc5: 91.6000 (93.8320)  time: 0.1773  data: 0.0138  max mem: 27255
Test: Total time: 0:00:11 (0.2369 s / it)
* Acc@1 76.342 Acc@5 93.736 loss 1.179
Accuracy of the model on the 50000 test images: 76.3%
Max accuracy: 76.34%
Epoch: [119]  [   0/2502]  eta: 1:30:16  lr: 0.002888  min_lr: 0.002888  loss: 3.1770 (3.1770)  weight_decay: 0.0500 (0.0500)  time: 2.1649  data: 1.8291  max mem: 27255
Epoch: [119]  [ 200/2502]  eta: 0:13:16  lr: 0.002887  min_lr: 0.002887  loss: 3.6547 (3.4793)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6549 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [119]  [ 400/2502]  eta: 0:11:54  lr: 0.002885  min_lr: 0.002885  loss: 3.7522 (3.5250)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6638 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [119]  [ 600/2502]  eta: 0:10:43  lr: 0.002883  min_lr: 0.002883  loss: 3.7744 (3.5368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6888 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [119]  [ 800/2502]  eta: 0:09:33  lr: 0.002882  min_lr: 0.002882  loss: 3.6837 (3.5254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6586 (inf)  time: 0.3351  data: 0.0003  max mem: 27255
Epoch: [119]  [1000/2502]  eta: 0:08:25  lr: 0.002880  min_lr: 0.002880  loss: 3.6017 (3.5214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6979 (inf)  time: 0.3341  data: 0.0003  max mem: 27255
Epoch: [119]  [1200/2502]  eta: 0:07:17  lr: 0.002879  min_lr: 0.002879  loss: 3.7866 (3.5261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6548 (inf)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [119]  [1400/2502]  eta: 0:06:10  lr: 0.002877  min_lr: 0.002877  loss: 3.7322 (3.5273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6861 (inf)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [119]  [1600/2502]  eta: 0:05:02  lr: 0.002875  min_lr: 0.002875  loss: 3.6443 (3.5209)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7256 (inf)  time: 0.3346  data: 0.0003  max mem: 27255
Epoch: [119]  [1800/2502]  eta: 0:03:55  lr: 0.002874  min_lr: 0.002874  loss: 3.6101 (3.5294)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6001 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [119]  [2000/2502]  eta: 0:02:48  lr: 0.002872  min_lr: 0.002872  loss: 3.6063 (3.5315)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6752 (inf)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [119]  [2200/2502]  eta: 0:01:41  lr: 0.002870  min_lr: 0.002870  loss: 3.4462 (3.5310)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6585 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [119]  [2400/2502]  eta: 0:00:34  lr: 0.002869  min_lr: 0.002869  loss: 3.2333 (3.5309)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6299 (inf)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [119]  [2501/2502]  eta: 0:00:00  lr: 0.002868  min_lr: 0.002868  loss: 3.8320 (3.5270)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6429 (inf)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [119] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002868  min_lr: 0.002868  loss: 3.8320 (3.5273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6429 (inf)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.7175 (0.7175)  acc1: 89.6000 (89.6000)  acc5: 98.8000 (98.8000)  time: 2.7153  data: 2.5263  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0856 (1.0896)  acc1: 84.0000 (82.3273)  acc5: 95.6000 (95.7455)  time: 0.4293  data: 0.2632  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.1034 (1.1028)  acc1: 80.4000 (81.0667)  acc5: 95.6000 (96.0381)  time: 0.2405  data: 0.0766  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2446 (1.2247)  acc1: 75.6000 (78.9419)  acc5: 94.0000 (94.5032)  time: 0.2838  data: 0.1201  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.5522 (1.3041)  acc1: 72.0000 (76.9951)  acc5: 91.2000 (93.6683)  time: 0.2401  data: 0.0745  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.5324 (1.3215)  acc1: 71.2000 (76.3280)  acc5: 91.6000 (93.5120)  time: 0.1783  data: 0.0127  max mem: 27255
Test: Total time: 0:00:13 (0.2778 s / it)
* Acc@1 76.250 Acc@5 93.602 loss 1.324
Accuracy of the model on the 50000 test images: 76.3%
Max accuracy: 76.34%
Epoch: [120]  [   0/2502]  eta: 1:32:38  lr: 0.002868  min_lr: 0.002868  loss: 3.6122 (3.6122)  weight_decay: 0.0500 (0.0500)  time: 2.2216  data: 1.6213  max mem: 27255
Epoch: [120]  [ 200/2502]  eta: 0:13:11  lr: 0.002866  min_lr: 0.002866  loss: 3.6850 (3.5217)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7020 (0.6567)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [120]  [ 400/2502]  eta: 0:11:52  lr: 0.002865  min_lr: 0.002865  loss: 3.4802 (3.5420)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6710 (0.6715)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [120]  [ 600/2502]  eta: 0:10:41  lr: 0.002863  min_lr: 0.002863  loss: 3.5052 (3.5286)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7167 (0.6750)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [120]  [ 800/2502]  eta: 0:09:32  lr: 0.002862  min_lr: 0.002862  loss: 3.7601 (3.5336)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6688 (0.6780)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [120]  [1000/2502]  eta: 0:08:24  lr: 0.002860  min_lr: 0.002860  loss: 3.6615 (3.5279)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6973 (0.6829)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [120]  [1200/2502]  eta: 0:07:17  lr: 0.002858  min_lr: 0.002858  loss: 3.4606 (3.5279)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6893 (0.6900)  time: 0.3356  data: 0.0003  max mem: 27255
Epoch: [120]  [1400/2502]  eta: 0:06:10  lr: 0.002857  min_lr: 0.002857  loss: 3.6941 (3.5242)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6301 (0.6885)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [120]  [1600/2502]  eta: 0:05:02  lr: 0.002855  min_lr: 0.002855  loss: 3.7162 (3.5268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6548 (0.6881)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [120]  [1800/2502]  eta: 0:03:55  lr: 0.002853  min_lr: 0.002853  loss: 3.6627 (3.5281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6221 (0.6869)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [120]  [2000/2502]  eta: 0:02:48  lr: 0.002852  min_lr: 0.002852  loss: 3.5553 (3.5295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7121 (0.6880)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [120]  [2200/2502]  eta: 0:01:41  lr: 0.002850  min_lr: 0.002850  loss: 3.7653 (3.5264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6355 (0.6886)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [120]  [2400/2502]  eta: 0:00:34  lr: 0.002849  min_lr: 0.002849  loss: 3.6267 (3.5297)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6683 (0.6880)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [120]  [2501/2502]  eta: 0:00:00  lr: 0.002848  min_lr: 0.002848  loss: 3.4511 (3.5290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6485 (0.6882)  time: 0.3009  data: 0.0009  max mem: 27255
Epoch: [120] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.002848  min_lr: 0.002848  loss: 3.4511 (3.5231)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6485 (0.6882)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.5823 (0.5823)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.1515  data: 2.9500  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8947 (0.9228)  acc1: 80.0000 (81.6000)  acc5: 96.4000 (96.0364)  time: 0.4558  data: 0.2884  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9547 (0.9378)  acc1: 80.0000 (81.2381)  acc5: 96.0000 (96.2476)  time: 0.2291  data: 0.0646  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1099 (1.0566)  acc1: 76.8000 (79.0710)  acc5: 94.4000 (94.7226)  time: 0.2579  data: 0.0898  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3567 (1.1384)  acc1: 72.4000 (77.0439)  acc5: 90.8000 (93.9024)  time: 0.2051  data: 0.0377  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3567 (1.1595)  acc1: 70.8000 (76.5360)  acc5: 92.4000 (93.8320)  time: 0.2011  data: 0.0376  max mem: 27255
Test: Total time: 0:00:13 (0.2683 s / it)
* Acc@1 76.572 Acc@5 93.836 loss 1.162
Accuracy of the model on the 50000 test images: 76.6%
Max accuracy: 76.57%
Epoch: [121]  [   0/2502]  eta: 1:21:35  lr: 0.002848  min_lr: 0.002848  loss: 3.6640 (3.6640)  weight_decay: 0.0500 (0.0500)  time: 1.9567  data: 1.5971  max mem: 27255
Epoch: [121]  [ 200/2502]  eta: 0:13:08  lr: 0.002846  min_lr: 0.002846  loss: 3.7463 (3.5190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7604 (0.7550)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [121]  [ 400/2502]  eta: 0:11:51  lr: 0.002845  min_lr: 0.002845  loss: 3.7314 (3.5207)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6432 (0.7266)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [121]  [ 600/2502]  eta: 0:10:41  lr: 0.002843  min_lr: 0.002843  loss: 3.6363 (3.5107)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7134 (0.7178)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [121]  [ 800/2502]  eta: 0:09:32  lr: 0.002841  min_lr: 0.002841  loss: 3.5165 (3.5169)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6723 (0.7054)  time: 0.3362  data: 0.0005  max mem: 27255
Epoch: [121]  [1000/2502]  eta: 0:08:24  lr: 0.002840  min_lr: 0.002840  loss: 3.7968 (3.5180)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6643 (0.6984)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [121]  [1200/2502]  eta: 0:07:17  lr: 0.002838  min_lr: 0.002838  loss: 3.6510 (3.5106)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6942 (0.6927)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [121]  [1400/2502]  eta: 0:06:10  lr: 0.002836  min_lr: 0.002836  loss: 3.3689 (3.4999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6555 (0.6927)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [121]  [1600/2502]  eta: 0:05:02  lr: 0.002835  min_lr: 0.002835  loss: 3.7941 (3.5044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6999 (0.6929)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [121]  [1800/2502]  eta: 0:03:55  lr: 0.002833  min_lr: 0.002833  loss: 3.7513 (3.5063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6658 (0.6952)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [121]  [2000/2502]  eta: 0:02:48  lr: 0.002831  min_lr: 0.002831  loss: 3.8366 (3.5108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6740 (0.6965)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [121]  [2200/2502]  eta: 0:01:41  lr: 0.002830  min_lr: 0.002830  loss: 3.2731 (3.5121)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6885 (0.6938)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [121]  [2400/2502]  eta: 0:00:34  lr: 0.002828  min_lr: 0.002828  loss: 3.6564 (3.5109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6790 (0.6940)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [121]  [2501/2502]  eta: 0:00:00  lr: 0.002827  min_lr: 0.002827  loss: 3.6514 (3.5151)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6497 (0.6934)  time: 0.3007  data: 0.0009  max mem: 27255
Epoch: [121] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.002827  min_lr: 0.002827  loss: 3.6514 (3.5117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6497 (0.6934)
Test:  [ 0/50]  eta: 0:02:55  loss: 0.6966 (0.6966)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.5070  data: 3.3173  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 1.0058 (1.0265)  acc1: 82.0000 (81.6727)  acc5: 97.2000 (96.5091)  time: 0.4697  data: 0.3034  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0058 (1.0210)  acc1: 80.4000 (81.2571)  acc5: 96.4000 (96.6476)  time: 0.1679  data: 0.0039  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1893 (1.1357)  acc1: 77.6000 (79.2645)  acc5: 93.6000 (94.9936)  time: 0.1824  data: 0.0183  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4098 (1.2080)  acc1: 72.4000 (77.3561)  acc5: 91.2000 (94.1561)  time: 0.2110  data: 0.0470  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4098 (1.2238)  acc1: 71.6000 (76.8000)  acc5: 92.0000 (93.9840)  time: 0.2105  data: 0.0468  max mem: 27255
Test: Total time: 0:00:12 (0.2532 s / it)
* Acc@1 76.390 Acc@5 93.752 loss 1.229
Accuracy of the model on the 50000 test images: 76.4%
Max accuracy: 76.57%
Epoch: [122]  [   0/2502]  eta: 1:29:04  lr: 0.002827  min_lr: 0.002827  loss: 3.5259 (3.5259)  weight_decay: 0.0500 (0.0500)  time: 2.1361  data: 1.7281  max mem: 27255
Epoch: [122]  [ 200/2502]  eta: 0:13:10  lr: 0.002826  min_lr: 0.002826  loss: 3.2778 (3.4358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6849 (0.6950)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [122]  [ 400/2502]  eta: 0:11:52  lr: 0.002824  min_lr: 0.002824  loss: 3.7690 (3.4918)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6731 (0.6952)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [122]  [ 600/2502]  eta: 0:10:41  lr: 0.002823  min_lr: 0.002823  loss: 3.7417 (3.4975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6773 (0.6853)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [122]  [ 800/2502]  eta: 0:09:32  lr: 0.002821  min_lr: 0.002821  loss: 3.5217 (3.4958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6291 (0.6907)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [122]  [1000/2502]  eta: 0:08:24  lr: 0.002819  min_lr: 0.002819  loss: 3.3235 (3.4870)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6796 (0.6934)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [122]  [1200/2502]  eta: 0:07:16  lr: 0.002818  min_lr: 0.002818  loss: 3.6471 (3.5006)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6763 (0.6915)  time: 0.3367  data: 0.0004  max mem: 27255
Epoch: [122]  [1400/2502]  eta: 0:06:09  lr: 0.002816  min_lr: 0.002816  loss: 3.6116 (3.5023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6458 (0.6976)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [122]  [1600/2502]  eta: 0:05:02  lr: 0.002814  min_lr: 0.002814  loss: 3.7938 (3.5101)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6534 (0.6931)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [122]  [1800/2502]  eta: 0:03:55  lr: 0.002813  min_lr: 0.002813  loss: 3.6636 (3.5050)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6952 (0.6968)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [122]  [2000/2502]  eta: 0:02:48  lr: 0.002811  min_lr: 0.002811  loss: 3.4373 (3.5041)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7932 (0.6991)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [122]  [2200/2502]  eta: 0:01:41  lr: 0.002809  min_lr: 0.002809  loss: 3.9103 (3.5114)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6883 (0.6983)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [122]  [2400/2502]  eta: 0:00:34  lr: 0.002808  min_lr: 0.002808  loss: 3.6146 (3.5103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6855 (0.6975)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [122]  [2501/2502]  eta: 0:00:00  lr: 0.002807  min_lr: 0.002807  loss: 3.5395 (3.5081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6817 (0.6979)  time: 0.3008  data: 0.0007  max mem: 27255
Epoch: [122] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.002807  min_lr: 0.002807  loss: 3.5395 (3.5071)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6817 (0.6979)
Test:  [ 0/50]  eta: 0:02:08  loss: 0.5454 (0.5454)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 2.5786  data: 2.3768  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9077 (0.9407)  acc1: 83.2000 (82.3273)  acc5: 96.0000 (96.3273)  time: 0.4192  data: 0.2519  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9156 (0.9571)  acc1: 80.0000 (81.1429)  acc5: 96.8000 (96.5333)  time: 0.1923  data: 0.0283  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1251 (1.0780)  acc1: 76.8000 (78.8387)  acc5: 93.6000 (94.8258)  time: 0.1930  data: 0.0291  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3870 (1.1530)  acc1: 72.0000 (77.1122)  acc5: 90.8000 (93.9512)  time: 0.1935  data: 0.0298  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3443 (1.1658)  acc1: 72.0000 (76.6400)  acc5: 91.6000 (93.8720)  time: 0.1796  data: 0.0160  max mem: 27255
Test: Total time: 0:00:11 (0.2388 s / it)
* Acc@1 76.584 Acc@5 93.728 loss 1.170
Accuracy of the model on the 50000 test images: 76.6%
Max accuracy: 76.58%
Epoch: [123]  [   0/2502]  eta: 1:25:02  lr: 0.002807  min_lr: 0.002807  loss: 3.7559 (3.7559)  weight_decay: 0.0500 (0.0500)  time: 2.0393  data: 1.6901  max mem: 27255
Epoch: [123]  [ 200/2502]  eta: 0:13:09  lr: 0.002805  min_lr: 0.002805  loss: 3.4881 (3.5103)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6679 (inf)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [123]  [ 400/2502]  eta: 0:11:52  lr: 0.002804  min_lr: 0.002804  loss: 3.5400 (3.4957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6855 (inf)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [123]  [ 600/2502]  eta: 0:10:41  lr: 0.002802  min_lr: 0.002802  loss: 3.5924 (3.5034)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6628 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [123]  [ 800/2502]  eta: 0:09:33  lr: 0.002800  min_lr: 0.002800  loss: 3.3236 (3.5202)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7215 (inf)  time: 0.3445  data: 0.0007  max mem: 27255
Epoch: [123]  [1000/2502]  eta: 0:08:25  lr: 0.002799  min_lr: 0.002799  loss: 3.7565 (3.4996)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6895 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [123]  [1200/2502]  eta: 0:07:17  lr: 0.002797  min_lr: 0.002797  loss: 3.5194 (3.5054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6304 (inf)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [123]  [1400/2502]  eta: 0:06:10  lr: 0.002795  min_lr: 0.002795  loss: 3.4946 (3.5027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6540 (inf)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [123]  [1600/2502]  eta: 0:05:02  lr: 0.002794  min_lr: 0.002794  loss: 3.3559 (3.5029)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6846 (inf)  time: 0.3391  data: 0.0005  max mem: 27255
Epoch: [123]  [1800/2502]  eta: 0:03:55  lr: 0.002792  min_lr: 0.002792  loss: 3.6891 (3.5042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7060 (inf)  time: 0.3408  data: 0.0004  max mem: 27255
Epoch: [123]  [2000/2502]  eta: 0:02:48  lr: 0.002790  min_lr: 0.002790  loss: 3.8365 (3.5048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6660 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [123]  [2200/2502]  eta: 0:01:41  lr: 0.002789  min_lr: 0.002789  loss: 3.5302 (3.5083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6063 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [123]  [2400/2502]  eta: 0:00:34  lr: 0.002787  min_lr: 0.002787  loss: 3.4680 (3.5179)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6839 (inf)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [123]  [2501/2502]  eta: 0:00:00  lr: 0.002786  min_lr: 0.002786  loss: 3.4641 (3.5178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7358 (inf)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [123] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.002786  min_lr: 0.002786  loss: 3.4641 (3.5117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7358 (inf)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.6341 (0.6341)  acc1: 91.6000 (91.6000)  acc5: 98.4000 (98.4000)  time: 2.7722  data: 2.5760  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8677 (0.9351)  acc1: 82.0000 (82.6182)  acc5: 96.8000 (96.6909)  time: 0.4272  data: 0.2604  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9070 (0.9447)  acc1: 80.8000 (81.8095)  acc5: 96.8000 (96.8381)  time: 0.2018  data: 0.0373  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0575 (1.0637)  acc1: 76.8000 (79.5355)  acc5: 94.4000 (94.9548)  time: 0.2222  data: 0.0578  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3729 (1.1353)  acc1: 73.2000 (77.7171)  acc5: 90.4000 (94.1463)  time: 0.2410  data: 0.0773  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3575 (1.1498)  acc1: 72.0000 (77.0480)  acc5: 91.6000 (93.9520)  time: 0.2078  data: 0.0442  max mem: 27255
Test: Total time: 0:00:13 (0.2650 s / it)
* Acc@1 76.776 Acc@5 93.924 loss 1.157
Accuracy of the model on the 50000 test images: 76.8%
Max accuracy: 76.78%
Epoch: [124]  [   0/2502]  eta: 1:22:34  lr: 0.002786  min_lr: 0.002786  loss: 3.5575 (3.5575)  weight_decay: 0.0500 (0.0500)  time: 1.9802  data: 1.6383  max mem: 27255
Epoch: [124]  [ 200/2502]  eta: 0:13:09  lr: 0.002785  min_lr: 0.002785  loss: 3.5023 (3.4483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6859 (0.7092)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [124]  [ 400/2502]  eta: 0:11:51  lr: 0.002783  min_lr: 0.002783  loss: 3.6379 (3.4651)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6271 (0.7041)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [124]  [ 600/2502]  eta: 0:10:40  lr: 0.002781  min_lr: 0.002781  loss: 3.4299 (3.4848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6866 (0.7155)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [124]  [ 800/2502]  eta: 0:09:32  lr: 0.002780  min_lr: 0.002780  loss: 3.6551 (3.5023)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6662 (0.7062)  time: 0.3420  data: 0.0004  max mem: 27255
Epoch: [124]  [1000/2502]  eta: 0:08:24  lr: 0.002778  min_lr: 0.002778  loss: 3.2155 (3.4947)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7047 (0.7059)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [124]  [1200/2502]  eta: 0:07:16  lr: 0.002776  min_lr: 0.002776  loss: 3.5024 (3.5027)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6554 (0.7025)  time: 0.3365  data: 0.0005  max mem: 27255
Epoch: [124]  [1400/2502]  eta: 0:06:09  lr: 0.002775  min_lr: 0.002775  loss: 3.3644 (3.5212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6497 (0.6966)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [124]  [1600/2502]  eta: 0:05:02  lr: 0.002773  min_lr: 0.002773  loss: 3.4467 (3.5082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7145 (0.6969)  time: 0.3408  data: 0.0005  max mem: 27255
Epoch: [124]  [1800/2502]  eta: 0:03:55  lr: 0.002771  min_lr: 0.002771  loss: 3.5110 (3.5063)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6816 (0.6979)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [124]  [2000/2502]  eta: 0:02:48  lr: 0.002770  min_lr: 0.002770  loss: 3.6831 (3.5007)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7088 (0.6951)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [124]  [2200/2502]  eta: 0:01:41  lr: 0.002768  min_lr: 0.002768  loss: 3.7609 (3.5086)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6984 (0.6947)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [124]  [2400/2502]  eta: 0:00:34  lr: 0.002767  min_lr: 0.002767  loss: 3.5090 (3.5124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7144 (0.6955)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [124]  [2501/2502]  eta: 0:00:00  lr: 0.002766  min_lr: 0.002766  loss: 3.6336 (3.5105)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6910 (0.6966)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [124] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.002766  min_lr: 0.002766  loss: 3.6336 (3.5114)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6910 (0.6966)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.5237 (0.5237)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 2.8455  data: 2.6450  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8919 (0.9070)  acc1: 83.6000 (82.7273)  acc5: 97.2000 (96.7636)  time: 0.4160  data: 0.2486  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8919 (0.9227)  acc1: 81.6000 (82.1143)  acc5: 96.4000 (96.6667)  time: 0.1850  data: 0.0209  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0954 (1.0442)  acc1: 77.6000 (79.5871)  acc5: 93.6000 (95.0323)  time: 0.1963  data: 0.0323  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3483 (1.1183)  acc1: 72.4000 (77.9512)  acc5: 91.2000 (94.2732)  time: 0.2117  data: 0.0479  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3464 (1.1382)  acc1: 73.6000 (77.3360)  acc5: 91.6000 (94.0080)  time: 0.1994  data: 0.0358  max mem: 27255
Test: Total time: 0:00:12 (0.2489 s / it)
* Acc@1 77.016 Acc@5 93.928 loss 1.147
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.02%
Epoch: [125]  [   0/2502]  eta: 1:22:00  lr: 0.002766  min_lr: 0.002766  loss: 3.7204 (3.7204)  weight_decay: 0.0500 (0.0500)  time: 1.9668  data: 1.6173  max mem: 27255
Epoch: [125]  [ 200/2502]  eta: 0:13:08  lr: 0.002764  min_lr: 0.002764  loss: 3.6531 (3.4903)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7424 (0.7014)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [125]  [ 400/2502]  eta: 0:11:51  lr: 0.002762  min_lr: 0.002762  loss: 3.5197 (3.4771)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6791 (0.7101)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [125]  [ 600/2502]  eta: 0:10:40  lr: 0.002761  min_lr: 0.002761  loss: 3.5577 (3.4756)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6592 (0.7067)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [125]  [ 800/2502]  eta: 0:09:32  lr: 0.002759  min_lr: 0.002759  loss: 3.7705 (3.4848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6458 (0.6990)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [125]  [1000/2502]  eta: 0:08:24  lr: 0.002757  min_lr: 0.002757  loss: 3.3411 (3.4927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7710 (0.7041)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [125]  [1200/2502]  eta: 0:07:17  lr: 0.002756  min_lr: 0.002756  loss: 3.6225 (3.5002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6616 (0.7056)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [125]  [1400/2502]  eta: 0:06:09  lr: 0.002754  min_lr: 0.002754  loss: 3.4690 (3.4962)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6926 (0.6989)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [125]  [1600/2502]  eta: 0:05:02  lr: 0.002752  min_lr: 0.002752  loss: 3.3497 (3.4906)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6820 (0.6972)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [125]  [1800/2502]  eta: 0:03:55  lr: 0.002751  min_lr: 0.002751  loss: 3.4255 (3.4943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7327 (0.6990)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [125]  [2000/2502]  eta: 0:02:48  lr: 0.002749  min_lr: 0.002749  loss: 3.4116 (3.4938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6513 (0.6997)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [125]  [2200/2502]  eta: 0:01:41  lr: 0.002747  min_lr: 0.002747  loss: 3.5359 (3.4919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6696 (0.6980)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [125]  [2400/2502]  eta: 0:00:34  lr: 0.002746  min_lr: 0.002746  loss: 3.7668 (3.4953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6723 (0.6986)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [125]  [2501/2502]  eta: 0:00:00  lr: 0.002745  min_lr: 0.002745  loss: 3.4977 (3.4934)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6615 (0.6986)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [125] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.002745  min_lr: 0.002745  loss: 3.4977 (3.5017)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6615 (0.6986)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.5782 (0.5782)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 3.1432  data: 2.9539  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9068 (0.9677)  acc1: 84.0000 (82.8364)  acc5: 97.6000 (96.7636)  time: 0.4745  data: 0.3082  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9435 (0.9808)  acc1: 80.8000 (81.8286)  acc5: 96.4000 (96.5905)  time: 0.2003  data: 0.0363  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0634 (1.0870)  acc1: 76.8000 (79.5871)  acc5: 93.2000 (94.8645)  time: 0.1889  data: 0.0249  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3592 (1.1569)  acc1: 73.2000 (77.6488)  acc5: 90.8000 (94.0585)  time: 0.1869  data: 0.0223  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3297 (1.1661)  acc1: 72.8000 (77.1040)  acc5: 91.6000 (93.9920)  time: 0.1974  data: 0.0308  max mem: 27255
Test: Total time: 0:00:12 (0.2532 s / it)
* Acc@1 76.716 Acc@5 93.886 loss 1.174
Accuracy of the model on the 50000 test images: 76.7%
Max accuracy: 77.02%
Epoch: [126]  [   0/2502]  eta: 1:28:39  lr: 0.002745  min_lr: 0.002745  loss: 2.3629 (2.3629)  weight_decay: 0.0500 (0.0500)  time: 2.1259  data: 1.7793  max mem: 27255
Epoch: [126]  [ 200/2502]  eta: 0:13:13  lr: 0.002743  min_lr: 0.002743  loss: 3.6095 (3.4351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6575 (0.7035)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [126]  [ 400/2502]  eta: 0:11:54  lr: 0.002742  min_lr: 0.002742  loss: 3.3571 (3.4281)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6719 (0.7010)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [126]  [ 600/2502]  eta: 0:10:44  lr: 0.002740  min_lr: 0.002740  loss: 3.7307 (3.4616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7121 (0.6924)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [126]  [ 800/2502]  eta: 0:09:34  lr: 0.002738  min_lr: 0.002738  loss: 3.2996 (3.4625)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [126]  [1000/2502]  eta: 0:08:26  lr: 0.002737  min_lr: 0.002737  loss: 3.6643 (3.4677)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6986 (nan)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [126]  [1200/2502]  eta: 0:07:18  lr: 0.002735  min_lr: 0.002735  loss: 3.5974 (3.4813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6688 (nan)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [126]  [1400/2502]  eta: 0:06:10  lr: 0.002733  min_lr: 0.002733  loss: 3.6381 (3.4905)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7060 (nan)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [126]  [1600/2502]  eta: 0:05:03  lr: 0.002732  min_lr: 0.002732  loss: 3.4922 (3.4849)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7022 (nan)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [126]  [1800/2502]  eta: 0:03:55  lr: 0.002730  min_lr: 0.002730  loss: 3.5707 (3.4891)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6374 (nan)  time: 0.3356  data: 0.0005  max mem: 27255
Epoch: [126]  [2000/2502]  eta: 0:02:48  lr: 0.002728  min_lr: 0.002728  loss: 3.5843 (3.4945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6564 (nan)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [126]  [2200/2502]  eta: 0:01:41  lr: 0.002727  min_lr: 0.002727  loss: 3.8298 (3.5024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6882 (nan)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [126]  [2400/2502]  eta: 0:00:34  lr: 0.002725  min_lr: 0.002725  loss: 3.6135 (3.4995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7305 (nan)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [126]  [2501/2502]  eta: 0:00:00  lr: 0.002724  min_lr: 0.002724  loss: 3.5896 (3.4989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6911 (nan)  time: 0.3019  data: 0.0009  max mem: 27255
Epoch: [126] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.002724  min_lr: 0.002724  loss: 3.5896 (3.4969)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6911 (nan)
Test:  [ 0/50]  eta: 0:02:16  loss: 0.7237 (0.7237)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 2.7212  data: 2.5154  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0009 (1.0518)  acc1: 82.8000 (83.2727)  acc5: 96.4000 (96.0364)  time: 0.4472  data: 0.2794  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0353 (1.0646)  acc1: 81.2000 (82.3810)  acc5: 96.4000 (96.1905)  time: 0.2267  data: 0.0627  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2324 (1.1843)  acc1: 78.8000 (79.9097)  acc5: 92.8000 (94.6839)  time: 0.2374  data: 0.0735  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4450 (1.2642)  acc1: 72.4000 (77.8342)  acc5: 92.0000 (93.8537)  time: 0.2244  data: 0.0605  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4392 (1.2758)  acc1: 70.4000 (77.1920)  acc5: 92.0000 (93.7840)  time: 0.1857  data: 0.0219  max mem: 27255
Test: Total time: 0:00:13 (0.2664 s / it)
* Acc@1 76.696 Acc@5 93.728 loss 1.284
Accuracy of the model on the 50000 test images: 76.7%
Max accuracy: 77.02%
Epoch: [127]  [   0/2502]  eta: 1:17:02  lr: 0.002724  min_lr: 0.002724  loss: 2.4925 (2.4925)  weight_decay: 0.0500 (0.0500)  time: 1.8474  data: 1.3757  max mem: 27255
Epoch: [127]  [ 200/2502]  eta: 0:13:09  lr: 0.002722  min_lr: 0.002722  loss: 3.3819 (3.4231)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6689 (0.7042)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [127]  [ 400/2502]  eta: 0:11:52  lr: 0.002721  min_lr: 0.002721  loss: 3.7312 (3.4531)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6936 (0.7077)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [127]  [ 600/2502]  eta: 0:10:41  lr: 0.002719  min_lr: 0.002719  loss: 3.6311 (3.4405)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6621 (0.6965)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [127]  [ 800/2502]  eta: 0:09:32  lr: 0.002717  min_lr: 0.002717  loss: 3.7898 (3.4439)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7098 (0.6961)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [127]  [1000/2502]  eta: 0:08:24  lr: 0.002716  min_lr: 0.002716  loss: 3.5548 (3.4446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7189 (0.7004)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [127]  [1200/2502]  eta: 0:07:17  lr: 0.002714  min_lr: 0.002714  loss: 3.6523 (3.4573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6495 (0.7005)  time: 0.3345  data: 0.0003  max mem: 27255
Epoch: [127]  [1400/2502]  eta: 0:06:09  lr: 0.002712  min_lr: 0.002712  loss: 3.7241 (3.4627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7110 (0.6995)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [127]  [1600/2502]  eta: 0:05:02  lr: 0.002711  min_lr: 0.002711  loss: 3.6009 (3.4683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6740 (0.6948)  time: 0.3375  data: 0.0004  max mem: 27255
Epoch: [127]  [1800/2502]  eta: 0:03:55  lr: 0.002709  min_lr: 0.002709  loss: 3.3982 (3.4658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6378 (0.6919)  time: 0.3371  data: 0.0006  max mem: 27255
Epoch: [127]  [2000/2502]  eta: 0:02:48  lr: 0.002707  min_lr: 0.002707  loss: 3.3081 (3.4643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6113 (0.6937)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [127]  [2200/2502]  eta: 0:01:41  lr: 0.002706  min_lr: 0.002706  loss: 3.6788 (3.4682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6998 (0.6968)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [127]  [2400/2502]  eta: 0:00:34  lr: 0.002704  min_lr: 0.002704  loss: 3.4002 (3.4701)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6490 (0.6942)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [127]  [2501/2502]  eta: 0:00:00  lr: 0.002703  min_lr: 0.002703  loss: 3.3941 (3.4757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7469 (0.6968)  time: 0.3007  data: 0.0009  max mem: 27255
Epoch: [127] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.002703  min_lr: 0.002703  loss: 3.3941 (3.4873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7469 (0.6968)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.5078 (0.5078)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 2.7690  data: 2.5784  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8780 (0.9090)  acc1: 83.2000 (82.5818)  acc5: 96.8000 (96.1818)  time: 0.4792  data: 0.3128  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8911 (0.9230)  acc1: 81.6000 (81.6571)  acc5: 96.8000 (96.4952)  time: 0.2246  data: 0.0606  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1177 (1.0461)  acc1: 77.2000 (79.2258)  acc5: 94.0000 (94.7742)  time: 0.2242  data: 0.0598  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3342 (1.1222)  acc1: 70.8000 (77.2390)  acc5: 91.2000 (93.9805)  time: 0.2112  data: 0.0424  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3332 (1.1373)  acc1: 70.4000 (76.6720)  acc5: 91.2000 (93.8320)  time: 0.1926  data: 0.0231  max mem: 27255
Test: Total time: 0:00:13 (0.2613 s / it)
* Acc@1 76.702 Acc@5 93.846 loss 1.133
Accuracy of the model on the 50000 test images: 76.7%
Max accuracy: 77.02%
Epoch: [128]  [   0/2502]  eta: 1:30:28  lr: 0.002703  min_lr: 0.002703  loss: 4.0383 (4.0383)  weight_decay: 0.0500 (0.0500)  time: 2.1698  data: 1.5441  max mem: 27255
Epoch: [128]  [ 200/2502]  eta: 0:13:11  lr: 0.002701  min_lr: 0.002701  loss: 3.6334 (3.5174)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6942 (0.7048)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [128]  [ 400/2502]  eta: 0:11:52  lr: 0.002700  min_lr: 0.002700  loss: 3.6722 (3.5012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6816 (0.6869)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [128]  [ 600/2502]  eta: 0:10:42  lr: 0.002698  min_lr: 0.002698  loss: 3.6806 (3.4995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6975 (0.6992)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [128]  [ 800/2502]  eta: 0:09:32  lr: 0.002696  min_lr: 0.002696  loss: 3.4325 (3.5004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7087 (0.7068)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [128]  [1000/2502]  eta: 0:08:24  lr: 0.002695  min_lr: 0.002695  loss: 3.7096 (3.4948)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7075 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [128]  [1200/2502]  eta: 0:07:17  lr: 0.002693  min_lr: 0.002693  loss: 3.6141 (3.4910)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6675 (inf)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [128]  [1400/2502]  eta: 0:06:09  lr: 0.002691  min_lr: 0.002691  loss: 3.3148 (3.4966)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6659 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [128]  [1600/2502]  eta: 0:05:02  lr: 0.002690  min_lr: 0.002690  loss: 3.3542 (3.5002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6953 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [128]  [1800/2502]  eta: 0:03:55  lr: 0.002688  min_lr: 0.002688  loss: 3.4949 (3.4971)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7422 (inf)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [128]  [2000/2502]  eta: 0:02:48  lr: 0.002686  min_lr: 0.002686  loss: 3.4929 (3.4948)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6522 (inf)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [128]  [2200/2502]  eta: 0:01:41  lr: 0.002685  min_lr: 0.002685  loss: 3.7438 (3.4983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7007 (inf)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [128]  [2400/2502]  eta: 0:00:34  lr: 0.002683  min_lr: 0.002683  loss: 3.8209 (3.5008)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7168 (inf)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [128]  [2501/2502]  eta: 0:00:00  lr: 0.002682  min_lr: 0.002682  loss: 3.5394 (3.5016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7257 (inf)  time: 0.3000  data: 0.0007  max mem: 27255
Epoch: [128] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.002682  min_lr: 0.002682  loss: 3.5394 (3.4892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7257 (inf)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.6102 (0.6102)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 2.6592  data: 2.4574  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9190 (0.9516)  acc1: 84.0000 (83.0909)  acc5: 97.6000 (96.7273)  time: 0.4064  data: 0.2390  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9038 (0.9567)  acc1: 81.2000 (82.2667)  acc5: 96.8000 (96.6286)  time: 0.2217  data: 0.0575  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1274 (1.0724)  acc1: 77.2000 (80.1161)  acc5: 94.8000 (95.1742)  time: 0.2419  data: 0.0766  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3712 (1.1522)  acc1: 74.4000 (77.9707)  acc5: 91.6000 (94.3220)  time: 0.2022  data: 0.0373  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3574 (1.1748)  acc1: 72.4000 (77.3840)  acc5: 91.6000 (94.0160)  time: 0.1745  data: 0.0109  max mem: 27255
Test: Total time: 0:00:12 (0.2547 s / it)
* Acc@1 76.856 Acc@5 93.934 loss 1.185
Accuracy of the model on the 50000 test images: 76.9%
Max accuracy: 77.02%
Epoch: [129]  [   0/2502]  eta: 1:23:35  lr: 0.002682  min_lr: 0.002682  loss: 4.1482 (4.1482)  weight_decay: 0.0500 (0.0500)  time: 2.0048  data: 1.6484  max mem: 27255
Epoch: [129]  [ 200/2502]  eta: 0:13:12  lr: 0.002680  min_lr: 0.002680  loss: 3.7782 (3.4944)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7036 (0.6865)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [129]  [ 400/2502]  eta: 0:11:53  lr: 0.002679  min_lr: 0.002679  loss: 3.4926 (3.4839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6930 (0.7054)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [129]  [ 600/2502]  eta: 0:10:42  lr: 0.002677  min_lr: 0.002677  loss: 3.6799 (3.4920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7305 (0.7091)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [129]  [ 800/2502]  eta: 0:09:33  lr: 0.002675  min_lr: 0.002675  loss: 3.5920 (3.4775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7350 (0.7058)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [129]  [1000/2502]  eta: 0:08:24  lr: 0.002674  min_lr: 0.002674  loss: 3.6336 (3.4735)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6894 (0.7024)  time: 0.3348  data: 0.0003  max mem: 27255
Epoch: [129]  [1200/2502]  eta: 0:07:17  lr: 0.002672  min_lr: 0.002672  loss: 3.6897 (3.4786)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6667 (0.7024)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [129]  [1400/2502]  eta: 0:06:09  lr: 0.002670  min_lr: 0.002670  loss: 3.4746 (3.4779)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6625 (0.7011)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [129]  [1600/2502]  eta: 0:05:02  lr: 0.002669  min_lr: 0.002669  loss: 3.7055 (3.4781)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6398 (0.7029)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [129]  [1800/2502]  eta: 0:03:55  lr: 0.002667  min_lr: 0.002667  loss: 3.3842 (3.4787)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7066 (0.7043)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [129]  [2000/2502]  eta: 0:02:48  lr: 0.002665  min_lr: 0.002665  loss: 3.3552 (3.4763)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7008 (0.7061)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [129]  [2200/2502]  eta: 0:01:41  lr: 0.002663  min_lr: 0.002663  loss: 3.8348 (3.4731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6473 (0.7061)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [129]  [2400/2502]  eta: 0:00:34  lr: 0.002662  min_lr: 0.002662  loss: 3.6281 (3.4698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6963 (0.7062)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [129]  [2501/2502]  eta: 0:00:00  lr: 0.002661  min_lr: 0.002661  loss: 3.6896 (3.4722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6813 (0.7063)  time: 0.3015  data: 0.0007  max mem: 27255
Epoch: [129] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.002661  min_lr: 0.002661  loss: 3.6896 (3.4892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6813 (0.7063)
Test:  [ 0/50]  eta: 0:02:07  loss: 0.7261 (0.7261)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 2.5585  data: 2.3661  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9682 (1.0051)  acc1: 82.8000 (82.3636)  acc5: 96.4000 (96.2909)  time: 0.4201  data: 0.2535  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0291 (1.0261)  acc1: 80.4000 (81.3524)  acc5: 96.4000 (96.4762)  time: 0.2178  data: 0.0519  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2024 (1.1329)  acc1: 76.0000 (79.1097)  acc5: 93.2000 (94.8903)  time: 0.2495  data: 0.0835  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4063 (1.1961)  acc1: 73.2000 (77.5902)  acc5: 90.8000 (94.1561)  time: 0.2284  data: 0.0635  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4063 (1.2142)  acc1: 72.4000 (77.0000)  acc5: 92.0000 (94.0320)  time: 0.1786  data: 0.0109  max mem: 27255
Test: Total time: 0:00:13 (0.2623 s / it)
* Acc@1 76.992 Acc@5 93.994 loss 1.218
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.02%
Epoch: [130]  [   0/2502]  eta: 1:29:37  lr: 0.002661  min_lr: 0.002661  loss: 3.6404 (3.6404)  weight_decay: 0.0500 (0.0500)  time: 2.1494  data: 1.3775  max mem: 27255
Epoch: [130]  [ 200/2502]  eta: 0:13:09  lr: 0.002659  min_lr: 0.002659  loss: 3.7795 (3.4786)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6635 (0.7067)  time: 0.3328  data: 0.0003  max mem: 27255
Epoch: [130]  [ 400/2502]  eta: 0:11:51  lr: 0.002658  min_lr: 0.002658  loss: 3.7432 (3.4962)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6642 (0.7023)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [130]  [ 600/2502]  eta: 0:10:40  lr: 0.002656  min_lr: 0.002656  loss: 3.6842 (3.4765)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6813 (0.7066)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [130]  [ 800/2502]  eta: 0:09:32  lr: 0.002654  min_lr: 0.002654  loss: 3.5895 (3.4780)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6047 (0.7065)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [130]  [1000/2502]  eta: 0:08:24  lr: 0.002652  min_lr: 0.002652  loss: 3.6824 (3.4912)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7323 (0.7032)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [130]  [1200/2502]  eta: 0:07:17  lr: 0.002651  min_lr: 0.002651  loss: 3.7888 (3.4896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6920 (0.7078)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [130]  [1400/2502]  eta: 0:06:10  lr: 0.002649  min_lr: 0.002649  loss: 3.5593 (3.4831)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6755 (0.7055)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [130]  [1600/2502]  eta: 0:05:02  lr: 0.002647  min_lr: 0.002647  loss: 3.2425 (3.4825)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6429 (0.7047)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [130]  [1800/2502]  eta: 0:03:55  lr: 0.002646  min_lr: 0.002646  loss: 3.4978 (3.4848)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7016 (0.7054)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [130]  [2000/2502]  eta: 0:02:48  lr: 0.002644  min_lr: 0.002644  loss: 3.7452 (3.4833)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6354 (0.7070)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [130]  [2200/2502]  eta: 0:01:41  lr: 0.002642  min_lr: 0.002642  loss: 3.6953 (3.4844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6712 (0.7059)  time: 0.3327  data: 0.0003  max mem: 27255
Epoch: [130]  [2400/2502]  eta: 0:00:34  lr: 0.002641  min_lr: 0.002641  loss: 3.5984 (3.4875)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6987 (0.7048)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [130]  [2501/2502]  eta: 0:00:00  lr: 0.002640  min_lr: 0.002640  loss: 3.7166 (3.4916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6984 (0.7059)  time: 0.3009  data: 0.0008  max mem: 27255
Epoch: [130] Total time: 0:13:58 (0.3349 s / it)
Averaged stats: lr: 0.002640  min_lr: 0.002640  loss: 3.7166 (3.4939)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6984 (0.7059)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.6747 (0.6747)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.7662  data: 2.5715  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9904 (1.0018)  acc1: 82.8000 (82.8000)  acc5: 96.8000 (96.3273)  time: 0.4049  data: 0.2380  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9904 (1.0071)  acc1: 80.8000 (81.8667)  acc5: 96.4000 (96.4191)  time: 0.2239  data: 0.0600  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2229 (1.1242)  acc1: 78.0000 (79.7548)  acc5: 93.6000 (94.8387)  time: 0.2670  data: 0.1032  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3836 (1.1777)  acc1: 73.6000 (77.9902)  acc5: 91.6000 (94.2634)  time: 0.2295  data: 0.0658  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3575 (1.2003)  acc1: 71.6000 (77.2480)  acc5: 92.8000 (94.1440)  time: 0.1839  data: 0.0204  max mem: 27255
Test: Total time: 0:00:13 (0.2680 s / it)
* Acc@1 77.130 Acc@5 94.006 loss 1.207
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.13%
Epoch: [131]  [   0/2502]  eta: 1:14:27  lr: 0.002640  min_lr: 0.002640  loss: 2.6611 (2.6611)  weight_decay: 0.0500 (0.0500)  time: 1.7856  data: 1.4478  max mem: 27255
Epoch: [131]  [ 200/2502]  eta: 0:13:05  lr: 0.002638  min_lr: 0.002638  loss: 3.6380 (3.4693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6720 (0.7051)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [131]  [ 400/2502]  eta: 0:11:50  lr: 0.002636  min_lr: 0.002636  loss: 3.4941 (3.4553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7312 (0.7208)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [131]  [ 600/2502]  eta: 0:10:40  lr: 0.002635  min_lr: 0.002635  loss: 3.4735 (3.4496)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7407 (0.7257)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [131]  [ 800/2502]  eta: 0:09:32  lr: 0.002633  min_lr: 0.002633  loss: 3.6680 (3.4693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6958 (0.7239)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [131]  [1000/2502]  eta: 0:08:24  lr: 0.002631  min_lr: 0.002631  loss: 3.3871 (3.4692)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6841 (0.7225)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [131]  [1200/2502]  eta: 0:07:17  lr: 0.002629  min_lr: 0.002629  loss: 3.7092 (3.4834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6432 (0.7209)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [131]  [1400/2502]  eta: 0:06:10  lr: 0.002628  min_lr: 0.002628  loss: 3.8128 (3.4878)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7124 (0.7194)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [131]  [1600/2502]  eta: 0:05:02  lr: 0.002626  min_lr: 0.002626  loss: 3.6154 (3.4858)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6958 (0.7156)  time: 0.3364  data: 0.0006  max mem: 27255
Epoch: [131]  [1800/2502]  eta: 0:03:55  lr: 0.002624  min_lr: 0.002624  loss: 3.8051 (3.4801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7103 (0.7156)  time: 0.3357  data: 0.0005  max mem: 27255
Epoch: [131]  [2000/2502]  eta: 0:02:48  lr: 0.002623  min_lr: 0.002623  loss: 3.6541 (3.4815)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6354 (0.7130)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [131]  [2200/2502]  eta: 0:01:41  lr: 0.002621  min_lr: 0.002621  loss: 3.8327 (3.4871)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6958 (0.7158)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [131]  [2400/2502]  eta: 0:00:34  lr: 0.002619  min_lr: 0.002619  loss: 3.5822 (3.4822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6765 (0.7155)  time: 0.3442  data: 0.0005  max mem: 27255
Epoch: [131]  [2501/2502]  eta: 0:00:00  lr: 0.002618  min_lr: 0.002618  loss: 3.4684 (3.4834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7157 (0.7163)  time: 0.3006  data: 0.0009  max mem: 27255
Epoch: [131] Total time: 0:13:59 (0.3357 s / it)
Averaged stats: lr: 0.002618  min_lr: 0.002618  loss: 3.4684 (3.4820)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7157 (0.7163)
Test:  [ 0/50]  eta: 0:03:07  loss: 0.6932 (0.6932)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.7470  data: 3.5555  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.9241 (0.9677)  acc1: 82.4000 (82.0727)  acc5: 96.4000 (96.3636)  time: 0.5040  data: 0.3373  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9759 (0.9847)  acc1: 79.2000 (81.6381)  acc5: 96.4000 (96.5143)  time: 0.2141  data: 0.0493  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1296 (1.0927)  acc1: 77.6000 (79.5097)  acc5: 94.4000 (95.0323)  time: 0.2443  data: 0.0798  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.3773 (1.1609)  acc1: 72.8000 (77.7659)  acc5: 92.0000 (94.1463)  time: 0.2140  data: 0.0503  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3773 (1.1774)  acc1: 73.2000 (77.2880)  acc5: 92.0000 (94.0320)  time: 0.2154  data: 0.0501  max mem: 27255
Test: Total time: 0:00:13 (0.2781 s / it)
* Acc@1 77.024 Acc@5 94.038 loss 1.183
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.13%
Epoch: [132]  [   0/2502]  eta: 1:31:41  lr: 0.002618  min_lr: 0.002618  loss: 3.9460 (3.9460)  weight_decay: 0.0500 (0.0500)  time: 2.1990  data: 1.8478  max mem: 27255
Epoch: [132]  [ 200/2502]  eta: 0:13:12  lr: 0.002617  min_lr: 0.002617  loss: 3.7207 (3.4408)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6559 (0.7427)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [132]  [ 400/2502]  eta: 0:11:52  lr: 0.002615  min_lr: 0.002615  loss: 3.3862 (3.4575)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7551 (0.7195)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [132]  [ 600/2502]  eta: 0:10:41  lr: 0.002613  min_lr: 0.002613  loss: 3.4418 (3.4627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6517 (0.7156)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [132]  [ 800/2502]  eta: 0:09:32  lr: 0.002612  min_lr: 0.002612  loss: 3.3074 (3.4542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7318 (0.7155)  time: 0.3410  data: 0.0004  max mem: 27255
Epoch: [132]  [1000/2502]  eta: 0:08:24  lr: 0.002610  min_lr: 0.002610  loss: 3.3840 (3.4569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6744 (0.7230)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [132]  [1200/2502]  eta: 0:07:17  lr: 0.002608  min_lr: 0.002608  loss: 3.7599 (3.4591)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6796 (0.7237)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [132]  [1400/2502]  eta: 0:06:09  lr: 0.002606  min_lr: 0.002606  loss: 3.4823 (3.4612)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6607 (0.7186)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [132]  [1600/2502]  eta: 0:05:02  lr: 0.002605  min_lr: 0.002605  loss: 3.2756 (3.4678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7290 (0.7246)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [132]  [1800/2502]  eta: 0:03:55  lr: 0.002603  min_lr: 0.002603  loss: 3.6849 (3.4683)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6750 (0.7221)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [132]  [2000/2502]  eta: 0:02:48  lr: 0.002601  min_lr: 0.002601  loss: 3.2794 (3.4702)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6530 (0.7213)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [132]  [2200/2502]  eta: 0:01:41  lr: 0.002600  min_lr: 0.002600  loss: 3.5044 (3.4687)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7825 (0.7241)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [132]  [2400/2502]  eta: 0:00:34  lr: 0.002598  min_lr: 0.002598  loss: 3.4386 (3.4657)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7038 (0.7224)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [132]  [2501/2502]  eta: 0:00:00  lr: 0.002597  min_lr: 0.002597  loss: 3.5499 (3.4673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6566 (0.7207)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [132] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002597  min_lr: 0.002597  loss: 3.5499 (3.4722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6566 (0.7207)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.6604 (0.6604)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.4364  data: 3.2357  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 1.0823 (1.0544)  acc1: 84.4000 (83.5273)  acc5: 96.0000 (96.1091)  time: 0.4755  data: 0.3078  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0823 (1.0808)  acc1: 80.0000 (81.5238)  acc5: 96.0000 (96.2095)  time: 0.1806  data: 0.0164  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2389 (1.2010)  acc1: 75.6000 (79.3290)  acc5: 94.4000 (94.7871)  time: 0.1790  data: 0.0149  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4781 (1.2732)  acc1: 72.0000 (77.3659)  acc5: 91.2000 (93.9805)  time: 0.1919  data: 0.0280  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4665 (1.2869)  acc1: 72.8000 (76.8720)  acc5: 92.4000 (93.8960)  time: 0.1889  data: 0.0251  max mem: 27255
Test: Total time: 0:00:12 (0.2501 s / it)
* Acc@1 76.606 Acc@5 93.930 loss 1.294
Accuracy of the model on the 50000 test images: 76.6%
Max accuracy: 77.13%
Epoch: [133]  [   0/2502]  eta: 1:37:08  lr: 0.002597  min_lr: 0.002597  loss: 2.9467 (2.9467)  weight_decay: 0.0500 (0.0500)  time: 2.3295  data: 1.7274  max mem: 27255
Epoch: [133]  [ 200/2502]  eta: 0:13:11  lr: 0.002595  min_lr: 0.002595  loss: 3.5344 (3.4512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6638 (0.7355)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [133]  [ 400/2502]  eta: 0:11:52  lr: 0.002594  min_lr: 0.002594  loss: 3.4994 (3.4561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6415 (0.7032)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [133]  [ 600/2502]  eta: 0:10:40  lr: 0.002592  min_lr: 0.002592  loss: 3.6358 (3.4745)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6563 (0.7044)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [133]  [ 800/2502]  eta: 0:09:32  lr: 0.002590  min_lr: 0.002590  loss: 3.4437 (3.4584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6742 (0.7029)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [133]  [1000/2502]  eta: 0:08:24  lr: 0.002588  min_lr: 0.002588  loss: 3.2444 (3.4421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7007 (0.7031)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [133]  [1200/2502]  eta: 0:07:17  lr: 0.002587  min_lr: 0.002587  loss: 3.7433 (3.4532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6180 (0.7007)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [133]  [1400/2502]  eta: 0:06:09  lr: 0.002585  min_lr: 0.002585  loss: 3.6015 (3.4587)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7255 (0.7047)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [133]  [1600/2502]  eta: 0:05:02  lr: 0.002583  min_lr: 0.002583  loss: 3.3820 (3.4654)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7228 (0.7076)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [133]  [1800/2502]  eta: 0:03:55  lr: 0.002582  min_lr: 0.002582  loss: 3.1440 (3.4723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7219 (inf)  time: 0.3360  data: 0.0003  max mem: 27255
Epoch: [133]  [2000/2502]  eta: 0:02:48  lr: 0.002580  min_lr: 0.002580  loss: 3.3825 (3.4680)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7400 (inf)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [133]  [2200/2502]  eta: 0:01:41  lr: 0.002578  min_lr: 0.002578  loss: 3.6883 (3.4773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6943 (inf)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [133]  [2400/2502]  eta: 0:00:34  lr: 0.002576  min_lr: 0.002576  loss: 3.3621 (3.4766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6890 (inf)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [133]  [2501/2502]  eta: 0:00:00  lr: 0.002576  min_lr: 0.002576  loss: 3.5584 (3.4753)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7020 (inf)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [133] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.002576  min_lr: 0.002576  loss: 3.5584 (3.4742)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7020 (inf)
Test:  [ 0/50]  eta: 0:02:14  loss: 0.5820 (0.5820)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 2.6900  data: 2.5009  max mem: 27255
Test:  [10/50]  eta: 0:00:15  loss: 0.8925 (0.9315)  acc1: 82.0000 (82.4364)  acc5: 97.2000 (96.6182)  time: 0.3989  data: 0.2326  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9447 (0.9597)  acc1: 79.6000 (81.6571)  acc5: 96.4000 (96.4571)  time: 0.2323  data: 0.0683  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1586 (1.0847)  acc1: 78.4000 (79.4194)  acc5: 94.8000 (94.8903)  time: 0.2757  data: 0.1104  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3897 (1.1615)  acc1: 73.2000 (77.5707)  acc5: 91.6000 (93.9902)  time: 0.2450  data: 0.0785  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3647 (1.1771)  acc1: 73.2000 (77.0560)  acc5: 91.6000 (93.8960)  time: 0.1990  data: 0.0336  max mem: 27255
Test: Total time: 0:00:13 (0.2760 s / it)
* Acc@1 77.282 Acc@5 94.084 loss 1.174
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.28%
Epoch: [134]  [   0/2502]  eta: 1:18:56  lr: 0.002576  min_lr: 0.002576  loss: 3.5938 (3.5938)  weight_decay: 0.0500 (0.0500)  time: 1.8930  data: 1.5356  max mem: 27255
Epoch: [134]  [ 200/2502]  eta: 0:13:07  lr: 0.002574  min_lr: 0.002574  loss: 3.7000 (3.4950)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7362 (0.7404)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [134]  [ 400/2502]  eta: 0:11:50  lr: 0.002572  min_lr: 0.002572  loss: 3.4168 (3.4924)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7000 (0.7383)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [134]  [ 600/2502]  eta: 0:10:41  lr: 0.002570  min_lr: 0.002570  loss: 3.6968 (3.4772)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7053 (0.7331)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [134]  [ 800/2502]  eta: 0:09:32  lr: 0.002569  min_lr: 0.002569  loss: 3.4480 (3.4731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6835 (0.7327)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [134]  [1000/2502]  eta: 0:08:24  lr: 0.002567  min_lr: 0.002567  loss: 3.7799 (3.4841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7821 (0.7317)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [134]  [1200/2502]  eta: 0:07:16  lr: 0.002565  min_lr: 0.002565  loss: 2.8040 (3.4753)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7316 (0.7282)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [134]  [1400/2502]  eta: 0:06:09  lr: 0.002564  min_lr: 0.002564  loss: 3.8314 (3.4784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7621 (0.7339)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [134]  [1600/2502]  eta: 0:05:02  lr: 0.002562  min_lr: 0.002562  loss: 3.4428 (3.4743)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7406 (0.7280)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [134]  [1800/2502]  eta: 0:03:55  lr: 0.002560  min_lr: 0.002560  loss: 3.2144 (3.4740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6910 (0.7282)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [134]  [2000/2502]  eta: 0:02:48  lr: 0.002558  min_lr: 0.002558  loss: 3.6213 (3.4686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6759 (0.7262)  time: 0.3435  data: 0.0006  max mem: 27255
Epoch: [134]  [2200/2502]  eta: 0:01:41  lr: 0.002557  min_lr: 0.002557  loss: 3.5520 (3.4711)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6951 (0.7263)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [134]  [2400/2502]  eta: 0:00:34  lr: 0.002555  min_lr: 0.002555  loss: 3.5046 (3.4737)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7253 (0.7257)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [134]  [2501/2502]  eta: 0:00:00  lr: 0.002554  min_lr: 0.002554  loss: 3.2345 (3.4747)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6945 (0.7237)  time: 0.3008  data: 0.0007  max mem: 27255
Epoch: [134] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002554  min_lr: 0.002554  loss: 3.2345 (3.4728)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6945 (0.7237)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.6809 (0.6809)  acc1: 90.8000 (90.8000)  acc5: 98.4000 (98.4000)  time: 3.6861  data: 3.4982  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9106 (0.9447)  acc1: 84.4000 (83.3091)  acc5: 97.2000 (96.4727)  time: 0.4883  data: 0.3222  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9433 (0.9649)  acc1: 81.2000 (81.9810)  acc5: 96.8000 (96.5333)  time: 0.1704  data: 0.0065  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0431 (1.0751)  acc1: 78.0000 (79.7032)  acc5: 94.0000 (94.9936)  time: 0.2129  data: 0.0490  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3450 (1.1421)  acc1: 74.4000 (77.7756)  acc5: 90.8000 (94.1463)  time: 0.2238  data: 0.0586  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3519 (1.1643)  acc1: 70.4000 (76.9680)  acc5: 91.2000 (94.0320)  time: 0.1810  data: 0.0139  max mem: 27255
Test: Total time: 0:00:13 (0.2635 s / it)
* Acc@1 77.070 Acc@5 94.006 loss 1.169
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.28%
Epoch: [135]  [   0/2502]  eta: 1:14:38  lr: 0.002554  min_lr: 0.002554  loss: 2.4279 (2.4279)  weight_decay: 0.0500 (0.0500)  time: 1.7901  data: 1.4180  max mem: 27255
Epoch: [135]  [ 200/2502]  eta: 0:13:07  lr: 0.002552  min_lr: 0.002552  loss: 3.5657 (3.4269)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6930 (0.7450)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [135]  [ 400/2502]  eta: 0:11:51  lr: 0.002551  min_lr: 0.002551  loss: 3.3873 (3.4473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7825 (0.7758)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [135]  [ 600/2502]  eta: 0:10:41  lr: 0.002549  min_lr: 0.002549  loss: 3.3464 (3.4560)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7123 (0.7547)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [135]  [ 800/2502]  eta: 0:09:32  lr: 0.002547  min_lr: 0.002547  loss: 3.5857 (3.4515)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6913 (0.7431)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [135]  [1000/2502]  eta: 0:08:24  lr: 0.002545  min_lr: 0.002545  loss: 3.6385 (3.4606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6647 (0.7335)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [135]  [1200/2502]  eta: 0:07:16  lr: 0.002544  min_lr: 0.002544  loss: 3.6418 (3.4691)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7485 (0.7358)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [135]  [1400/2502]  eta: 0:06:09  lr: 0.002542  min_lr: 0.002542  loss: 3.5552 (3.4746)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7300 (0.7352)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [135]  [1600/2502]  eta: 0:05:02  lr: 0.002540  min_lr: 0.002540  loss: 3.6096 (3.4722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7176 (0.7320)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [135]  [1800/2502]  eta: 0:03:55  lr: 0.002538  min_lr: 0.002538  loss: 3.3284 (3.4717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6425 (0.7282)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [135]  [2000/2502]  eta: 0:02:48  lr: 0.002537  min_lr: 0.002537  loss: 3.3497 (3.4712)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7944 (0.7327)  time: 0.3344  data: 0.0003  max mem: 27255
Epoch: [135]  [2200/2502]  eta: 0:01:41  lr: 0.002535  min_lr: 0.002535  loss: 3.7595 (3.4744)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6723 (0.7313)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [135]  [2400/2502]  eta: 0:00:34  lr: 0.002533  min_lr: 0.002533  loss: 3.3454 (3.4720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7465 (0.7328)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [135]  [2501/2502]  eta: 0:00:00  lr: 0.002532  min_lr: 0.002532  loss: 3.3723 (3.4701)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7227 (0.7321)  time: 0.2996  data: 0.0007  max mem: 27255
Epoch: [135] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.002532  min_lr: 0.002532  loss: 3.3723 (3.4719)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7227 (0.7321)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.6169 (0.6169)  acc1: 91.6000 (91.6000)  acc5: 99.2000 (99.2000)  time: 2.6517  data: 2.4679  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9466 (0.9791)  acc1: 84.0000 (82.8000)  acc5: 97.2000 (96.4727)  time: 0.4371  data: 0.2713  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9582 (0.9863)  acc1: 80.8000 (82.1524)  acc5: 96.8000 (96.5524)  time: 0.2360  data: 0.0720  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1187 (1.0877)  acc1: 77.2000 (79.7548)  acc5: 93.6000 (95.0065)  time: 0.2462  data: 0.0822  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3825 (1.1538)  acc1: 73.6000 (78.0293)  acc5: 91.6000 (94.2634)  time: 0.2186  data: 0.0546  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3499 (1.1712)  acc1: 72.8000 (77.4800)  acc5: 92.0000 (94.1280)  time: 0.1826  data: 0.0186  max mem: 27255
Test: Total time: 0:00:13 (0.2663 s / it)
* Acc@1 77.342 Acc@5 94.020 loss 1.183
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.34%
Epoch: [136]  [   0/2502]  eta: 1:30:10  lr: 0.002532  min_lr: 0.002532  loss: 4.4227 (4.4227)  weight_decay: 0.0500 (0.0500)  time: 2.1624  data: 1.8182  max mem: 27255
Epoch: [136]  [ 200/2502]  eta: 0:13:10  lr: 0.002531  min_lr: 0.002531  loss: 3.3419 (3.4048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6733 (0.7318)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [136]  [ 400/2502]  eta: 0:11:52  lr: 0.002529  min_lr: 0.002529  loss: 3.7833 (3.4413)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7318 (0.7384)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [136]  [ 600/2502]  eta: 0:10:42  lr: 0.002527  min_lr: 0.002527  loss: 3.5784 (3.4482)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6714 (0.7136)  time: 0.3365  data: 0.0004  max mem: 27255
Epoch: [136]  [ 800/2502]  eta: 0:09:34  lr: 0.002526  min_lr: 0.002526  loss: 3.4985 (3.4602)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7158 (0.7126)  time: 0.3493  data: 0.0004  max mem: 27255
Epoch: [136]  [1000/2502]  eta: 0:08:26  lr: 0.002524  min_lr: 0.002524  loss: 3.5473 (3.4543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6924 (0.7158)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [136]  [1200/2502]  eta: 0:07:18  lr: 0.002522  min_lr: 0.002522  loss: 3.5490 (3.4540)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6889 (0.7128)  time: 0.3368  data: 0.0005  max mem: 27255
Epoch: [136]  [1400/2502]  eta: 0:06:10  lr: 0.002520  min_lr: 0.002520  loss: 3.4860 (3.4568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7609 (0.7160)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [136]  [1600/2502]  eta: 0:05:03  lr: 0.002519  min_lr: 0.002519  loss: 3.6531 (3.4491)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7019 (0.7202)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [136]  [1800/2502]  eta: 0:03:55  lr: 0.002517  min_lr: 0.002517  loss: 3.8881 (3.4501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6999 (0.7190)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [136]  [2000/2502]  eta: 0:02:48  lr: 0.002515  min_lr: 0.002515  loss: 3.6586 (3.4490)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7122 (0.7183)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [136]  [2200/2502]  eta: 0:01:41  lr: 0.002513  min_lr: 0.002513  loss: 3.4258 (3.4473)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7022 (0.7218)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [136]  [2400/2502]  eta: 0:00:34  lr: 0.002512  min_lr: 0.002512  loss: 3.7444 (3.4466)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7287 (0.7231)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [136]  [2501/2502]  eta: 0:00:00  lr: 0.002511  min_lr: 0.002511  loss: 3.0371 (3.4465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6986 (0.7224)  time: 0.3000  data: 0.0008  max mem: 27255
Epoch: [136] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.002511  min_lr: 0.002511  loss: 3.0371 (3.4626)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6986 (0.7224)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.6253 (0.6253)  acc1: 91.2000 (91.2000)  acc5: 98.4000 (98.4000)  time: 3.2371  data: 3.0322  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9532 (0.9463)  acc1: 82.4000 (83.2000)  acc5: 96.4000 (96.2909)  time: 0.4977  data: 0.3298  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9532 (0.9573)  acc1: 82.0000 (82.1333)  acc5: 96.4000 (96.5333)  time: 0.2270  data: 0.0631  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1499 (1.0877)  acc1: 76.0000 (79.6645)  acc5: 94.0000 (94.8129)  time: 0.2439  data: 0.0801  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4043 (1.1592)  acc1: 72.8000 (77.8439)  acc5: 91.2000 (94.1463)  time: 0.2197  data: 0.0561  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3711 (1.1767)  acc1: 72.8000 (77.3200)  acc5: 92.0000 (94.0800)  time: 0.2193  data: 0.0559  max mem: 27255
Test: Total time: 0:00:13 (0.2749 s / it)
* Acc@1 77.116 Acc@5 94.032 loss 1.183
Accuracy of the model on the 50000 test images: 77.1%
Max accuracy: 77.34%
Epoch: [137]  [   0/2502]  eta: 1:29:56  lr: 0.002511  min_lr: 0.002511  loss: 3.3021 (3.3021)  weight_decay: 0.0500 (0.0500)  time: 2.1568  data: 1.8115  max mem: 27255
Epoch: [137]  [ 200/2502]  eta: 0:13:12  lr: 0.002509  min_lr: 0.002509  loss: 3.7217 (3.4500)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7181 (0.7055)  time: 0.3408  data: 0.0004  max mem: 27255
Epoch: [137]  [ 400/2502]  eta: 0:11:53  lr: 0.002507  min_lr: 0.002507  loss: 3.0620 (3.4239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7694 (0.7338)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [137]  [ 600/2502]  eta: 0:10:42  lr: 0.002506  min_lr: 0.002506  loss: 3.7938 (3.4208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7058 (0.7339)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [137]  [ 800/2502]  eta: 0:09:33  lr: 0.002504  min_lr: 0.002504  loss: 3.3877 (3.4422)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6994 (0.7367)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [137]  [1000/2502]  eta: 0:08:24  lr: 0.002502  min_lr: 0.002502  loss: 3.6097 (3.4457)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7166 (0.7330)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [137]  [1200/2502]  eta: 0:07:17  lr: 0.002500  min_lr: 0.002500  loss: 3.5813 (3.4533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7018 (0.7334)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [137]  [1400/2502]  eta: 0:06:09  lr: 0.002499  min_lr: 0.002499  loss: 3.6046 (3.4483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6827 (0.7344)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [137]  [1600/2502]  eta: 0:05:02  lr: 0.002497  min_lr: 0.002497  loss: 3.5912 (3.4553)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7147 (0.7388)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [137]  [1800/2502]  eta: 0:03:55  lr: 0.002495  min_lr: 0.002495  loss: 3.6368 (3.4548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7400 (0.7399)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [137]  [2000/2502]  eta: 0:02:48  lr: 0.002493  min_lr: 0.002493  loss: 3.4601 (3.4564)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6761 (0.7376)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [137]  [2200/2502]  eta: 0:01:41  lr: 0.002492  min_lr: 0.002492  loss: 3.6242 (3.4527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7079 (0.7349)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [137]  [2400/2502]  eta: 0:00:34  lr: 0.002490  min_lr: 0.002490  loss: 3.3378 (3.4531)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6757 (0.7346)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [137]  [2501/2502]  eta: 0:00:00  lr: 0.002489  min_lr: 0.002489  loss: 3.4246 (3.4543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7176 (0.7369)  time: 0.3014  data: 0.0007  max mem: 27255
Epoch: [137] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002489  min_lr: 0.002489  loss: 3.4246 (3.4651)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7176 (0.7369)
Test:  [ 0/50]  eta: 0:02:09  loss: 0.5732 (0.5732)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 2.5826  data: 2.3776  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8603 (0.8957)  acc1: 82.8000 (82.5455)  acc5: 97.2000 (96.6546)  time: 0.4143  data: 0.2468  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9405 (0.9125)  acc1: 81.6000 (82.2286)  acc5: 97.2000 (96.7429)  time: 0.1890  data: 0.0252  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0913 (1.0334)  acc1: 78.0000 (79.9097)  acc5: 94.4000 (95.1226)  time: 0.1882  data: 0.0235  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3472 (1.1137)  acc1: 71.6000 (77.8439)  acc5: 91.2000 (94.2537)  time: 0.2194  data: 0.0548  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3084 (1.1328)  acc1: 71.6000 (77.2480)  acc5: 92.0000 (94.1280)  time: 0.2096  data: 0.0461  max mem: 27255
Test: Total time: 0:00:12 (0.2494 s / it)
* Acc@1 77.214 Acc@5 94.070 loss 1.136
Accuracy of the model on the 50000 test images: 77.2%
Max accuracy: 77.34%
Epoch: [138]  [   0/2502]  eta: 1:30:16  lr: 0.002489  min_lr: 0.002489  loss: 2.4941 (2.4941)  weight_decay: 0.0500 (0.0500)  time: 2.1648  data: 1.4518  max mem: 27255
Epoch: [138]  [ 200/2502]  eta: 0:13:10  lr: 0.002487  min_lr: 0.002487  loss: 3.7187 (3.4747)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7470 (0.7914)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [138]  [ 400/2502]  eta: 0:11:51  lr: 0.002486  min_lr: 0.002486  loss: 3.3495 (3.4352)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6753 (0.7386)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [138]  [ 600/2502]  eta: 0:10:42  lr: 0.002484  min_lr: 0.002484  loss: 3.6743 (3.4483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6945 (0.7327)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [138]  [ 800/2502]  eta: 0:09:33  lr: 0.002482  min_lr: 0.002482  loss: 3.6826 (3.4628)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7267 (0.7260)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [138]  [1000/2502]  eta: 0:08:25  lr: 0.002480  min_lr: 0.002480  loss: 3.7216 (3.4697)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7303 (0.7286)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [138]  [1200/2502]  eta: 0:07:18  lr: 0.002479  min_lr: 0.002479  loss: 3.4779 (3.4698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7046 (0.7288)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [138]  [1400/2502]  eta: 0:06:10  lr: 0.002477  min_lr: 0.002477  loss: 3.6036 (3.4684)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7018 (0.7284)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [138]  [1600/2502]  eta: 0:05:02  lr: 0.002475  min_lr: 0.002475  loss: 3.6437 (3.4627)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7392 (0.7290)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [138]  [1800/2502]  eta: 0:03:55  lr: 0.002473  min_lr: 0.002473  loss: 3.4171 (3.4605)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6720 (0.7282)  time: 0.3416  data: 0.0004  max mem: 27255
Epoch: [138]  [2000/2502]  eta: 0:02:48  lr: 0.002472  min_lr: 0.002472  loss: 3.5212 (3.4675)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7085 (0.7266)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [138]  [2200/2502]  eta: 0:01:41  lr: 0.002470  min_lr: 0.002470  loss: 3.7175 (3.4682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6513 (0.7237)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [138]  [2400/2502]  eta: 0:00:34  lr: 0.002468  min_lr: 0.002468  loss: 3.4549 (3.4706)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6969 (0.7252)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [138]  [2501/2502]  eta: 0:00:00  lr: 0.002467  min_lr: 0.002467  loss: 3.4717 (3.4686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7329 (0.7265)  time: 0.3004  data: 0.0007  max mem: 27255
Epoch: [138] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002467  min_lr: 0.002467  loss: 3.4717 (3.4625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7329 (0.7265)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.5881 (0.5881)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 2.6028  data: 2.4178  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8914 (0.9139)  acc1: 82.4000 (83.2727)  acc5: 96.8000 (96.4364)  time: 0.4462  data: 0.2774  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9002 (0.9153)  acc1: 81.6000 (82.5143)  acc5: 96.4000 (96.6667)  time: 0.2481  data: 0.0826  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0646 (1.0302)  acc1: 76.8000 (80.0000)  acc5: 94.8000 (95.1355)  time: 0.2638  data: 0.1001  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3130 (1.1009)  acc1: 74.0000 (78.3805)  acc5: 91.2000 (94.2927)  time: 0.2179  data: 0.0543  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2983 (1.1156)  acc1: 72.4000 (77.7920)  acc5: 92.0000 (94.1360)  time: 0.1687  data: 0.0052  max mem: 27255
Test: Total time: 0:00:13 (0.2698 s / it)
* Acc@1 77.446 Acc@5 94.112 loss 1.124
Accuracy of the model on the 50000 test images: 77.4%
Max accuracy: 77.45%
Epoch: [139]  [   0/2502]  eta: 1:33:09  lr: 0.002467  min_lr: 0.002467  loss: 2.6556 (2.6556)  weight_decay: 0.0500 (0.0500)  time: 2.2340  data: 1.8791  max mem: 27255
Epoch: [139]  [ 200/2502]  eta: 0:13:10  lr: 0.002466  min_lr: 0.002466  loss: 3.3733 (3.3860)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6710 (0.7415)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [139]  [ 400/2502]  eta: 0:11:54  lr: 0.002464  min_lr: 0.002464  loss: 3.5478 (3.4368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7250 (0.7439)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [139]  [ 600/2502]  eta: 0:10:43  lr: 0.002462  min_lr: 0.002462  loss: 3.7104 (3.4399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7156 (0.7486)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [139]  [ 800/2502]  eta: 0:09:33  lr: 0.002460  min_lr: 0.002460  loss: 3.2324 (3.4411)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6941 (0.7451)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [139]  [1000/2502]  eta: 0:08:25  lr: 0.002459  min_lr: 0.002459  loss: 3.5884 (3.4452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7575 (0.7417)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [139]  [1200/2502]  eta: 0:07:17  lr: 0.002457  min_lr: 0.002457  loss: 3.6690 (3.4564)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7250 (0.7409)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [139]  [1400/2502]  eta: 0:06:09  lr: 0.002455  min_lr: 0.002455  loss: 3.5664 (3.4551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6873 (0.7404)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [139]  [1600/2502]  eta: 0:05:02  lr: 0.002453  min_lr: 0.002453  loss: 3.6140 (3.4592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6706 (0.7369)  time: 0.3407  data: 0.0003  max mem: 27255
Epoch: [139]  [1800/2502]  eta: 0:03:55  lr: 0.002452  min_lr: 0.002452  loss: 3.5033 (3.4568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6695 (0.7374)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [139]  [2000/2502]  eta: 0:02:48  lr: 0.002450  min_lr: 0.002450  loss: 3.3403 (3.4549)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7205 (0.7354)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [139]  [2200/2502]  eta: 0:01:41  lr: 0.002448  min_lr: 0.002448  loss: 3.5206 (3.4617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7721 (0.7402)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [139]  [2400/2502]  eta: 0:00:34  lr: 0.002446  min_lr: 0.002446  loss: 3.2785 (3.4610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7597 (0.7414)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [139]  [2501/2502]  eta: 0:00:00  lr: 0.002445  min_lr: 0.002445  loss: 3.5357 (3.4631)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7339 (inf)  time: 0.3013  data: 0.0009  max mem: 27255
Epoch: [139] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002445  min_lr: 0.002445  loss: 3.5357 (3.4551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7339 (inf)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.5690 (0.5690)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 3.3098  data: 3.1074  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9329 (0.9387)  acc1: 84.4000 (83.2000)  acc5: 96.8000 (96.6546)  time: 0.4541  data: 0.2866  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9343 (0.9486)  acc1: 81.2000 (82.0381)  acc5: 96.8000 (96.8191)  time: 0.1982  data: 0.0343  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0443 (1.0614)  acc1: 76.8000 (79.8839)  acc5: 94.8000 (95.1871)  time: 0.2524  data: 0.0868  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3687 (1.1385)  acc1: 74.0000 (78.0976)  acc5: 90.8000 (94.2244)  time: 0.2234  data: 0.0548  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3687 (1.1604)  acc1: 72.0000 (77.4320)  acc5: 91.2000 (94.1120)  time: 0.1668  data: 0.0002  max mem: 27255
Test: Total time: 0:00:13 (0.2662 s / it)
* Acc@1 77.358 Acc@5 94.156 loss 1.161
Accuracy of the model on the 50000 test images: 77.4%
Max accuracy: 77.45%
Epoch: [140]  [   0/2502]  eta: 1:33:33  lr: 0.002445  min_lr: 0.002445  loss: 2.9224 (2.9224)  weight_decay: 0.0500 (0.0500)  time: 2.2435  data: 1.7339  max mem: 27255
Epoch: [140]  [ 200/2502]  eta: 0:13:11  lr: 0.002444  min_lr: 0.002444  loss: 3.5409 (3.3986)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7142 (0.7263)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [140]  [ 400/2502]  eta: 0:11:53  lr: 0.002442  min_lr: 0.002442  loss: 3.1752 (3.4263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7065 (0.7468)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [140]  [ 600/2502]  eta: 0:10:41  lr: 0.002440  min_lr: 0.002440  loss: 3.4431 (3.4354)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7409 (0.7566)  time: 0.3359  data: 0.0004  max mem: 27255
Epoch: [140]  [ 800/2502]  eta: 0:09:32  lr: 0.002438  min_lr: 0.002438  loss: 3.6230 (3.4358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7073 (0.7482)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [140]  [1000/2502]  eta: 0:08:24  lr: 0.002437  min_lr: 0.002437  loss: 3.5403 (3.4399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6908 (0.7458)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [140]  [1200/2502]  eta: 0:07:16  lr: 0.002435  min_lr: 0.002435  loss: 3.6652 (3.4459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7275 (0.7479)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [140]  [1400/2502]  eta: 0:06:09  lr: 0.002433  min_lr: 0.002433  loss: 3.6575 (3.4452)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7363 (0.7476)  time: 0.3422  data: 0.0004  max mem: 27255
Epoch: [140]  [1600/2502]  eta: 0:05:02  lr: 0.002431  min_lr: 0.002431  loss: 3.7516 (3.4450)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7740 (0.7474)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [140]  [1800/2502]  eta: 0:03:55  lr: 0.002430  min_lr: 0.002430  loss: 3.2721 (3.4446)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7570 (0.7481)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [140]  [2000/2502]  eta: 0:02:48  lr: 0.002428  min_lr: 0.002428  loss: 3.4134 (3.4424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6586 (0.7439)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [140]  [2200/2502]  eta: 0:01:41  lr: 0.002426  min_lr: 0.002426  loss: 3.4516 (3.4407)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8249 (0.7487)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [140]  [2400/2502]  eta: 0:00:34  lr: 0.002424  min_lr: 0.002424  loss: 3.6910 (3.4474)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7521 (0.7473)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [140]  [2501/2502]  eta: 0:00:00  lr: 0.002424  min_lr: 0.002424  loss: 3.3802 (3.4467)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6975 (0.7452)  time: 0.2995  data: 0.0007  max mem: 27255
Epoch: [140] Total time: 0:13:57 (0.3345 s / it)
Averaged stats: lr: 0.002424  min_lr: 0.002424  loss: 3.3802 (3.4456)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6975 (0.7452)
Test:  [ 0/50]  eta: 0:02:16  loss: 0.5795 (0.5795)  acc1: 92.0000 (92.0000)  acc5: 98.4000 (98.4000)  time: 2.7257  data: 2.5300  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8967 (0.9444)  acc1: 84.4000 (82.7636)  acc5: 97.2000 (96.6909)  time: 0.4045  data: 0.2376  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9538 (0.9748)  acc1: 80.8000 (82.0191)  acc5: 97.2000 (96.7048)  time: 0.1936  data: 0.0296  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1363 (1.0841)  acc1: 78.0000 (79.9871)  acc5: 93.2000 (95.1613)  time: 0.2248  data: 0.0597  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3045 (1.1475)  acc1: 74.8000 (78.3415)  acc5: 91.2000 (94.4293)  time: 0.2148  data: 0.0471  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3045 (1.1734)  acc1: 72.0000 (77.5520)  acc5: 92.0000 (94.2640)  time: 0.1812  data: 0.0129  max mem: 27255
Test: Total time: 0:00:12 (0.2496 s / it)
* Acc@1 77.238 Acc@5 94.194 loss 1.184
Accuracy of the model on the 50000 test images: 77.2%
Max accuracy: 77.45%
Epoch: [141]  [   0/2502]  eta: 1:27:04  lr: 0.002424  min_lr: 0.002424  loss: 3.9642 (3.9642)  weight_decay: 0.0500 (0.0500)  time: 2.0880  data: 1.4861  max mem: 27255
Epoch: [141]  [ 200/2502]  eta: 0:13:10  lr: 0.002422  min_lr: 0.002422  loss: 3.6358 (3.4565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7541 (0.7179)  time: 0.3415  data: 0.0004  max mem: 27255
Epoch: [141]  [ 400/2502]  eta: 0:11:51  lr: 0.002420  min_lr: 0.002420  loss: 3.6520 (3.4395)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6847 (0.7148)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [141]  [ 600/2502]  eta: 0:10:40  lr: 0.002418  min_lr: 0.002418  loss: 3.6682 (3.4276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7050 (0.7153)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [141]  [ 800/2502]  eta: 0:09:32  lr: 0.002417  min_lr: 0.002417  loss: 3.3089 (3.4322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6559 (0.7174)  time: 0.3362  data: 0.0005  max mem: 27255
Epoch: [141]  [1000/2502]  eta: 0:08:24  lr: 0.002415  min_lr: 0.002415  loss: 3.6676 (3.4337)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6733 (0.7188)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [141]  [1200/2502]  eta: 0:07:17  lr: 0.002413  min_lr: 0.002413  loss: 3.7148 (3.4451)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7187 (0.7225)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [141]  [1400/2502]  eta: 0:06:09  lr: 0.002411  min_lr: 0.002411  loss: 3.4687 (3.4442)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7500 (0.7259)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [141]  [1600/2502]  eta: 0:05:02  lr: 0.002409  min_lr: 0.002409  loss: 3.7310 (3.4460)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7033 (0.7285)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [141]  [1800/2502]  eta: 0:03:55  lr: 0.002408  min_lr: 0.002408  loss: 3.5605 (3.4462)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7116 (0.7303)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [141]  [2000/2502]  eta: 0:02:48  lr: 0.002406  min_lr: 0.002406  loss: 3.2464 (3.4484)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6913 (0.7322)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [141]  [2200/2502]  eta: 0:01:41  lr: 0.002404  min_lr: 0.002404  loss: 3.6237 (3.4505)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7609 (0.7346)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [141]  [2400/2502]  eta: 0:00:34  lr: 0.002402  min_lr: 0.002402  loss: 3.7293 (3.4502)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7657 (0.7357)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [141]  [2501/2502]  eta: 0:00:00  lr: 0.002402  min_lr: 0.002402  loss: 3.4687 (3.4475)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7103 (0.7356)  time: 0.3023  data: 0.0010  max mem: 27255
Epoch: [141] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.002402  min_lr: 0.002402  loss: 3.4687 (3.4438)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7103 (0.7356)
Test:  [ 0/50]  eta: 0:02:47  loss: 0.7221 (0.7221)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.3573  data: 3.1578  max mem: 27255
Test:  [10/50]  eta: 0:00:21  loss: 1.0054 (1.0384)  acc1: 83.6000 (83.0909)  acc5: 96.8000 (96.9455)  time: 0.5339  data: 0.3660  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 1.0721 (1.0619)  acc1: 81.6000 (82.2476)  acc5: 97.2000 (97.0095)  time: 0.2207  data: 0.0559  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1628 (1.1625)  acc1: 77.6000 (80.0516)  acc5: 95.2000 (95.4065)  time: 0.1832  data: 0.0127  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4340 (1.2351)  acc1: 72.8000 (78.0683)  acc5: 91.2000 (94.5756)  time: 0.1736  data: 0.0020  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4070 (1.2580)  acc1: 71.6000 (77.3120)  acc5: 91.6000 (94.3120)  time: 0.1679  data: 0.0018  max mem: 27255
Test: Total time: 0:00:12 (0.2560 s / it)
* Acc@1 77.026 Acc@5 94.168 loss 1.263
Accuracy of the model on the 50000 test images: 77.0%
Max accuracy: 77.45%
Epoch: [142]  [   0/2502]  eta: 1:38:09  lr: 0.002402  min_lr: 0.002402  loss: 3.8433 (3.8433)  weight_decay: 0.0500 (0.0500)  time: 2.3539  data: 1.8726  max mem: 27255
Epoch: [142]  [ 200/2502]  eta: 0:13:16  lr: 0.002400  min_lr: 0.002400  loss: 3.4366 (3.3974)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6818 (0.7205)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [142]  [ 400/2502]  eta: 0:11:54  lr: 0.002398  min_lr: 0.002398  loss: 3.5967 (3.4116)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7329 (0.7376)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [142]  [ 600/2502]  eta: 0:10:43  lr: 0.002396  min_lr: 0.002396  loss: 3.2117 (3.4071)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7296 (0.7444)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [142]  [ 800/2502]  eta: 0:09:33  lr: 0.002395  min_lr: 0.002395  loss: 3.6088 (3.4211)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6933 (0.7361)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [142]  [1000/2502]  eta: 0:08:25  lr: 0.002393  min_lr: 0.002393  loss: 3.5802 (3.4294)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6811 (0.7377)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [142]  [1200/2502]  eta: 0:07:17  lr: 0.002391  min_lr: 0.002391  loss: 3.5670 (3.4386)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6607 (0.7356)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [142]  [1400/2502]  eta: 0:06:10  lr: 0.002389  min_lr: 0.002389  loss: 3.6451 (3.4423)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7894 (0.7395)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [142]  [1600/2502]  eta: 0:05:03  lr: 0.002388  min_lr: 0.002388  loss: 3.2786 (3.4364)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7346 (0.7438)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [142]  [1800/2502]  eta: 0:03:55  lr: 0.002386  min_lr: 0.002386  loss: 3.3658 (3.4324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7451 (0.7423)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [142]  [2000/2502]  eta: 0:02:48  lr: 0.002384  min_lr: 0.002384  loss: 3.4164 (3.4352)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7065 (0.7396)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [142]  [2200/2502]  eta: 0:01:41  lr: 0.002382  min_lr: 0.002382  loss: 3.6226 (3.4320)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7038 (0.7387)  time: 0.3372  data: 0.0005  max mem: 27255
Epoch: [142]  [2400/2502]  eta: 0:00:34  lr: 0.002380  min_lr: 0.002380  loss: 3.4563 (3.4362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7162 (0.7362)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [142]  [2501/2502]  eta: 0:00:00  lr: 0.002380  min_lr: 0.002380  loss: 3.5751 (3.4378)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7431 (0.7365)  time: 0.3007  data: 0.0009  max mem: 27255
Epoch: [142] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.002380  min_lr: 0.002380  loss: 3.5751 (3.4360)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7431 (0.7365)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.6457 (0.6457)  acc1: 92.4000 (92.4000)  acc5: 98.4000 (98.4000)  time: 3.2142  data: 3.0075  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9248 (0.9733)  acc1: 83.6000 (83.1273)  acc5: 97.2000 (96.5818)  time: 0.4670  data: 0.2991  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9740 (0.9907)  acc1: 81.6000 (82.2857)  acc5: 96.8000 (96.5714)  time: 0.2051  data: 0.0410  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1256 (1.0941)  acc1: 77.6000 (80.2194)  acc5: 94.4000 (95.2000)  time: 0.2013  data: 0.0373  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3927 (1.1664)  acc1: 73.2000 (78.3024)  acc5: 91.6000 (94.2537)  time: 0.2116  data: 0.0461  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3403 (1.1865)  acc1: 72.8000 (77.4800)  acc5: 92.0000 (94.1040)  time: 0.2015  data: 0.0361  max mem: 27255
Test: Total time: 0:00:13 (0.2621 s / it)
* Acc@1 77.286 Acc@5 94.134 loss 1.190
Accuracy of the model on the 50000 test images: 77.3%
Max accuracy: 77.45%
Epoch: [143]  [   0/2502]  eta: 1:35:25  lr: 0.002380  min_lr: 0.002380  loss: 2.3125 (2.3125)  weight_decay: 0.0500 (0.0500)  time: 2.2884  data: 1.9315  max mem: 27255
Epoch: [143]  [ 200/2502]  eta: 0:13:13  lr: 0.002378  min_lr: 0.002378  loss: 3.6873 (3.4461)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6929 (0.7271)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [143]  [ 400/2502]  eta: 0:11:52  lr: 0.002376  min_lr: 0.002376  loss: 3.3600 (3.4506)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7280 (0.7262)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [143]  [ 600/2502]  eta: 0:10:41  lr: 0.002374  min_lr: 0.002374  loss: 3.7474 (3.4595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7909 (0.7410)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [143]  [ 800/2502]  eta: 0:09:32  lr: 0.002373  min_lr: 0.002373  loss: 3.5873 (3.4524)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7011 (0.7399)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [143]  [1000/2502]  eta: 0:08:24  lr: 0.002371  min_lr: 0.002371  loss: 3.5779 (3.4478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6829 (0.7401)  time: 0.3376  data: 0.0005  max mem: 27255
Epoch: [143]  [1200/2502]  eta: 0:07:17  lr: 0.002369  min_lr: 0.002369  loss: 3.5331 (3.4568)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7006 (0.7332)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [143]  [1400/2502]  eta: 0:06:09  lr: 0.002367  min_lr: 0.002367  loss: 3.6404 (3.4526)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7729 (0.7373)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [143]  [1600/2502]  eta: 0:05:02  lr: 0.002365  min_lr: 0.002365  loss: 3.4731 (3.4508)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7378 (0.7373)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [143]  [1800/2502]  eta: 0:03:55  lr: 0.002364  min_lr: 0.002364  loss: 3.6857 (3.4537)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7179 (0.7387)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [143]  [2000/2502]  eta: 0:02:48  lr: 0.002362  min_lr: 0.002362  loss: 3.6744 (3.4576)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7153 (0.7362)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [143]  [2200/2502]  eta: 0:01:41  lr: 0.002360  min_lr: 0.002360  loss: 3.6320 (3.4584)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7298 (0.7355)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [143]  [2400/2502]  eta: 0:00:34  lr: 0.002358  min_lr: 0.002358  loss: 3.6503 (3.4610)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7644 (0.7362)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [143]  [2501/2502]  eta: 0:00:00  lr: 0.002358  min_lr: 0.002358  loss: 2.8830 (3.4604)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7300 (0.7367)  time: 0.3014  data: 0.0009  max mem: 27255
Epoch: [143] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.002358  min_lr: 0.002358  loss: 2.8830 (3.4403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7300 (0.7367)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.4543 (0.4543)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 2.8182  data: 2.6146  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8453 (0.8276)  acc1: 83.2000 (83.6000)  acc5: 97.6000 (96.9455)  time: 0.4203  data: 0.2510  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8477 (0.8597)  acc1: 82.0000 (82.2286)  acc5: 96.8000 (96.8000)  time: 0.2003  data: 0.0355  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9848 (0.9580)  acc1: 78.8000 (80.4903)  acc5: 94.4000 (95.4839)  time: 0.2218  data: 0.0568  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2283 (1.0297)  acc1: 74.4000 (78.7512)  acc5: 91.6000 (94.6146)  time: 0.2090  data: 0.0441  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2283 (1.0540)  acc1: 74.4000 (77.9200)  acc5: 91.6000 (94.4000)  time: 0.1791  data: 0.0156  max mem: 27255
Test: Total time: 0:00:12 (0.2516 s / it)
* Acc@1 77.830 Acc@5 94.316 loss 1.061
Accuracy of the model on the 50000 test images: 77.8%
Max accuracy: 77.83%
Epoch: [144]  [   0/2502]  eta: 1:36:44  lr: 0.002358  min_lr: 0.002358  loss: 2.3279 (2.3279)  weight_decay: 0.0500 (0.0500)  time: 2.3200  data: 1.9839  max mem: 27255
Epoch: [144]  [ 200/2502]  eta: 0:13:14  lr: 0.002356  min_lr: 0.002356  loss: 3.6349 (3.4035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7616 (0.7425)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [144]  [ 400/2502]  eta: 0:11:53  lr: 0.002354  min_lr: 0.002354  loss: 3.6159 (3.4295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7300 (0.7359)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [144]  [ 600/2502]  eta: 0:10:42  lr: 0.002352  min_lr: 0.002352  loss: 3.4891 (3.4271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8228 (0.7501)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [144]  [ 800/2502]  eta: 0:09:33  lr: 0.002350  min_lr: 0.002350  loss: 3.4847 (3.4247)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7088 (0.7464)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [144]  [1000/2502]  eta: 0:08:25  lr: 0.002349  min_lr: 0.002349  loss: 3.5950 (3.4335)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7385 (0.7516)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [144]  [1200/2502]  eta: 0:07:17  lr: 0.002347  min_lr: 0.002347  loss: 3.6903 (3.4360)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7894 (0.7529)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [144]  [1400/2502]  eta: 0:06:10  lr: 0.002345  min_lr: 0.002345  loss: 3.4946 (3.4312)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7771 (0.7503)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [144]  [1600/2502]  eta: 0:05:02  lr: 0.002343  min_lr: 0.002343  loss: 3.4123 (3.4273)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6912 (0.7460)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [144]  [1800/2502]  eta: 0:03:55  lr: 0.002342  min_lr: 0.002342  loss: 3.2650 (3.4276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7722 (0.7476)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [144]  [2000/2502]  eta: 0:02:48  lr: 0.002340  min_lr: 0.002340  loss: 3.4388 (3.4264)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7110 (0.7434)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [144]  [2200/2502]  eta: 0:01:41  lr: 0.002338  min_lr: 0.002338  loss: 3.4096 (3.4288)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7495 (0.7420)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [144]  [2400/2502]  eta: 0:00:34  lr: 0.002336  min_lr: 0.002336  loss: 3.5833 (3.4326)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7194 (0.7436)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [144]  [2501/2502]  eta: 0:00:00  lr: 0.002335  min_lr: 0.002335  loss: 3.5587 (3.4341)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7418 (0.7443)  time: 0.3000  data: 0.0011  max mem: 27255
Epoch: [144] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.002335  min_lr: 0.002335  loss: 3.5587 (3.4295)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7418 (0.7443)
Test:  [ 0/50]  eta: 0:03:05  loss: 0.6291 (0.6291)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 3.7140  data: 3.5157  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.9320 (0.9549)  acc1: 85.6000 (83.3091)  acc5: 97.2000 (97.1636)  time: 0.5152  data: 0.3469  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9606 (0.9820)  acc1: 81.6000 (82.2476)  acc5: 97.2000 (96.9143)  time: 0.2092  data: 0.0392  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1914 (1.0934)  acc1: 77.6000 (80.1806)  acc5: 94.0000 (95.4065)  time: 0.2268  data: 0.0576  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3509 (1.1685)  acc1: 73.2000 (78.2927)  acc5: 91.6000 (94.5659)  time: 0.2045  data: 0.0409  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3509 (1.1878)  acc1: 73.2000 (77.5360)  acc5: 91.6000 (94.3600)  time: 0.2043  data: 0.0407  max mem: 27255
Test: Total time: 0:00:13 (0.2713 s / it)
* Acc@1 77.490 Acc@5 94.226 loss 1.195
Accuracy of the model on the 50000 test images: 77.5%
Max accuracy: 77.83%
Epoch: [145]  [   0/2502]  eta: 1:30:47  lr: 0.002335  min_lr: 0.002335  loss: 3.6090 (3.6090)  weight_decay: 0.0500 (0.0500)  time: 2.1771  data: 1.8310  max mem: 27255
Epoch: [145]  [ 200/2502]  eta: 0:13:10  lr: 0.002334  min_lr: 0.002334  loss: 3.6368 (3.3921)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7188 (0.7534)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [145]  [ 400/2502]  eta: 0:11:53  lr: 0.002332  min_lr: 0.002332  loss: 3.3828 (3.4015)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7512 (0.7363)  time: 0.3358  data: 0.0003  max mem: 27255
Epoch: [145]  [ 600/2502]  eta: 0:10:42  lr: 0.002330  min_lr: 0.002330  loss: 3.4849 (3.4052)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6955 (0.7342)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [145]  [ 800/2502]  eta: 0:09:33  lr: 0.002328  min_lr: 0.002328  loss: 3.4614 (3.4219)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6851 (0.7253)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [145]  [1000/2502]  eta: 0:08:24  lr: 0.002327  min_lr: 0.002327  loss: 3.6020 (3.4204)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7068 (0.7302)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [145]  [1200/2502]  eta: 0:07:17  lr: 0.002325  min_lr: 0.002325  loss: 3.5441 (3.4180)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7720 (0.7373)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [145]  [1400/2502]  eta: 0:06:09  lr: 0.002323  min_lr: 0.002323  loss: 3.0898 (3.4157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7358 (0.7397)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [145]  [1600/2502]  eta: 0:05:02  lr: 0.002321  min_lr: 0.002321  loss: 3.5213 (3.4136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7459 (0.7403)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [145]  [1800/2502]  eta: 0:03:55  lr: 0.002319  min_lr: 0.002319  loss: 3.6948 (3.4249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6866 (0.7412)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [145]  [2000/2502]  eta: 0:02:48  lr: 0.002318  min_lr: 0.002318  loss: 3.5225 (3.4245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7165 (0.7414)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [145]  [2200/2502]  eta: 0:01:41  lr: 0.002316  min_lr: 0.002316  loss: 3.6765 (3.4210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7185 (0.7415)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [145]  [2400/2502]  eta: 0:00:34  lr: 0.002314  min_lr: 0.002314  loss: 3.5869 (3.4253)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7553 (0.7427)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [145]  [2501/2502]  eta: 0:00:00  lr: 0.002313  min_lr: 0.002313  loss: 3.5934 (3.4274)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7408 (0.7441)  time: 0.2999  data: 0.0008  max mem: 27255
Epoch: [145] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002313  min_lr: 0.002313  loss: 3.5934 (3.4256)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7408 (0.7441)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.6283 (0.6283)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.1535  data: 2.9674  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8522 (0.8958)  acc1: 84.4000 (83.4545)  acc5: 97.2000 (96.6546)  time: 0.4604  data: 0.2943  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8989 (0.9196)  acc1: 83.2000 (82.3429)  acc5: 96.8000 (96.5905)  time: 0.1927  data: 0.0287  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0668 (1.0223)  acc1: 78.0000 (80.0645)  acc5: 94.8000 (95.1484)  time: 0.2155  data: 0.0514  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2766 (1.0951)  acc1: 72.0000 (78.1951)  acc5: 92.0000 (94.3610)  time: 0.2151  data: 0.0498  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3379 (1.1186)  acc1: 71.6000 (77.4320)  acc5: 92.0000 (94.1280)  time: 0.1793  data: 0.0137  max mem: 27255
Test: Total time: 0:00:12 (0.2579 s / it)
* Acc@1 77.446 Acc@5 94.216 loss 1.117
Accuracy of the model on the 50000 test images: 77.4%
Max accuracy: 77.83%
Epoch: [146]  [   0/2502]  eta: 1:19:19  lr: 0.002313  min_lr: 0.002313  loss: 2.4952 (2.4952)  weight_decay: 0.0500 (0.0500)  time: 1.9022  data: 1.5620  max mem: 27255
Epoch: [146]  [ 200/2502]  eta: 0:13:09  lr: 0.002312  min_lr: 0.002312  loss: 3.6174 (3.4738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7729 (0.7799)  time: 0.3407  data: 0.0004  max mem: 27255
Epoch: [146]  [ 400/2502]  eta: 0:11:51  lr: 0.002310  min_lr: 0.002310  loss: 3.5857 (3.4734)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7828 (0.7792)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [146]  [ 600/2502]  eta: 0:10:41  lr: 0.002308  min_lr: 0.002308  loss: 3.8438 (3.4892)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7221 (0.7639)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [146]  [ 800/2502]  eta: 0:09:33  lr: 0.002306  min_lr: 0.002306  loss: 3.5192 (3.4907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7274 (0.7594)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [146]  [1000/2502]  eta: 0:08:25  lr: 0.002304  min_lr: 0.002304  loss: 3.5121 (3.4761)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6990 (0.7583)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [146]  [1200/2502]  eta: 0:07:17  lr: 0.002303  min_lr: 0.002303  loss: 3.3308 (3.4532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7301 (0.7603)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [146]  [1400/2502]  eta: 0:06:09  lr: 0.002301  min_lr: 0.002301  loss: 3.2654 (3.4368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7760 (0.7601)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [146]  [1600/2502]  eta: 0:05:02  lr: 0.002299  min_lr: 0.002299  loss: 3.4810 (3.4359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6786 (0.7592)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [146]  [1800/2502]  eta: 0:03:55  lr: 0.002297  min_lr: 0.002297  loss: 3.6803 (3.4329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7539 (0.7561)  time: 0.3345  data: 0.0003  max mem: 27255
Epoch: [146]  [2000/2502]  eta: 0:02:48  lr: 0.002296  min_lr: 0.002296  loss: 3.5008 (3.4384)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7453 (0.7560)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [146]  [2200/2502]  eta: 0:01:41  lr: 0.002294  min_lr: 0.002294  loss: 3.3379 (3.4347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7718 (0.7573)  time: 0.3409  data: 0.0003  max mem: 27255
Epoch: [146]  [2400/2502]  eta: 0:00:34  lr: 0.002292  min_lr: 0.002292  loss: 3.5446 (3.4327)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7331 (0.7565)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [146]  [2501/2502]  eta: 0:00:00  lr: 0.002291  min_lr: 0.002291  loss: 3.6573 (3.4339)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6695 (0.7541)  time: 0.3058  data: 0.0009  max mem: 27255
Epoch: [146] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002291  min_lr: 0.002291  loss: 3.6573 (3.4187)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6695 (0.7541)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.6446 (0.6446)  acc1: 90.8000 (90.8000)  acc5: 100.0000 (100.0000)  time: 2.8568  data: 2.6657  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9221 (0.9502)  acc1: 84.0000 (83.7455)  acc5: 98.0000 (97.2364)  time: 0.4113  data: 0.2447  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9798 (0.9871)  acc1: 81.6000 (82.5714)  acc5: 97.2000 (97.0667)  time: 0.2541  data: 0.0901  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1807 (1.0882)  acc1: 77.2000 (80.2323)  acc5: 94.8000 (95.3677)  time: 0.2800  data: 0.1161  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3596 (1.1512)  acc1: 72.8000 (78.3415)  acc5: 91.2000 (94.6049)  time: 0.2040  data: 0.0402  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3787 (1.1718)  acc1: 72.4000 (77.7280)  acc5: 92.0000 (94.4400)  time: 0.1768  data: 0.0131  max mem: 27255
Test: Total time: 0:00:13 (0.2718 s / it)
* Acc@1 77.660 Acc@5 94.304 loss 1.177
Accuracy of the model on the 50000 test images: 77.7%
Max accuracy: 77.83%
Epoch: [147]  [   0/2502]  eta: 1:38:15  lr: 0.002291  min_lr: 0.002291  loss: 4.0600 (4.0600)  weight_decay: 0.0500 (0.0500)  time: 2.3562  data: 1.5731  max mem: 27255
Epoch: [147]  [ 200/2502]  eta: 0:13:12  lr: 0.002289  min_lr: 0.002289  loss: 3.4694 (3.3441)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7171 (0.7494)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [147]  [ 400/2502]  eta: 0:11:53  lr: 0.002288  min_lr: 0.002288  loss: 3.2997 (3.3578)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7443 (0.7647)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [147]  [ 600/2502]  eta: 0:10:42  lr: 0.002286  min_lr: 0.002286  loss: 3.5867 (3.3822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7119 (0.7631)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [147]  [ 800/2502]  eta: 0:09:33  lr: 0.002284  min_lr: 0.002284  loss: 3.3552 (3.3836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7003 (nan)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [147]  [1000/2502]  eta: 0:08:25  lr: 0.002282  min_lr: 0.002282  loss: 3.5846 (3.3940)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7084 (nan)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [147]  [1200/2502]  eta: 0:07:17  lr: 0.002280  min_lr: 0.002280  loss: 3.3630 (3.3999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6618 (nan)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [147]  [1400/2502]  eta: 0:06:09  lr: 0.002279  min_lr: 0.002279  loss: 3.4696 (3.4016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7617 (nan)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [147]  [1600/2502]  eta: 0:05:02  lr: 0.002277  min_lr: 0.002277  loss: 3.5034 (3.4049)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7233 (nan)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [147]  [1800/2502]  eta: 0:03:55  lr: 0.002275  min_lr: 0.002275  loss: 3.6830 (3.4060)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7311 (nan)  time: 0.3448  data: 0.0005  max mem: 27255
Epoch: [147]  [2000/2502]  eta: 0:02:48  lr: 0.002273  min_lr: 0.002273  loss: 3.4211 (3.4100)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6946 (nan)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [147]  [2200/2502]  eta: 0:01:41  lr: 0.002272  min_lr: 0.002272  loss: 3.4033 (3.4113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7138 (nan)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [147]  [2400/2502]  eta: 0:00:34  lr: 0.002270  min_lr: 0.002270  loss: 3.4200 (3.4071)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7282 (nan)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [147]  [2501/2502]  eta: 0:00:00  lr: 0.002269  min_lr: 0.002269  loss: 3.6908 (3.4106)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6936 (nan)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [147] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.002269  min_lr: 0.002269  loss: 3.6908 (3.4108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6936 (nan)
Test:  [ 0/50]  eta: 0:02:14  loss: 0.6338 (0.6338)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.6803  data: 2.4807  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9337 (0.9708)  acc1: 84.0000 (84.0364)  acc5: 96.8000 (96.8727)  time: 0.4067  data: 0.2395  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9842 (1.0042)  acc1: 82.8000 (82.8571)  acc5: 96.8000 (96.8191)  time: 0.2016  data: 0.0360  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2078 (1.1169)  acc1: 77.2000 (80.5548)  acc5: 93.6000 (95.1742)  time: 0.2287  data: 0.0612  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4247 (1.1960)  acc1: 73.2000 (78.5561)  acc5: 92.0000 (94.4585)  time: 0.2247  data: 0.0555  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4079 (1.2155)  acc1: 72.4000 (77.9280)  acc5: 92.4000 (94.3280)  time: 0.1897  data: 0.0227  max mem: 27255
Test: Total time: 0:00:12 (0.2553 s / it)
* Acc@1 77.726 Acc@5 94.368 loss 1.217
Accuracy of the model on the 50000 test images: 77.7%
Max accuracy: 77.83%
Epoch: [148]  [   0/2502]  eta: 1:43:15  lr: 0.002269  min_lr: 0.002269  loss: 2.4383 (2.4383)  weight_decay: 0.0500 (0.0500)  time: 2.4763  data: 2.1178  max mem: 27255
Epoch: [148]  [ 200/2502]  eta: 0:13:15  lr: 0.002267  min_lr: 0.002267  loss: 3.3847 (3.4427)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7430 (0.7410)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [148]  [ 400/2502]  eta: 0:11:56  lr: 0.002265  min_lr: 0.002265  loss: 3.0444 (3.4147)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7345 (0.7415)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [148]  [ 600/2502]  eta: 0:10:43  lr: 0.002264  min_lr: 0.002264  loss: 3.5134 (3.4233)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7724 (0.7506)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [148]  [ 800/2502]  eta: 0:09:33  lr: 0.002262  min_lr: 0.002262  loss: 3.6068 (3.4192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7875 (0.7572)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [148]  [1000/2502]  eta: 0:08:25  lr: 0.002260  min_lr: 0.002260  loss: 2.9608 (3.4093)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7895 (0.7649)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [148]  [1200/2502]  eta: 0:07:17  lr: 0.002258  min_lr: 0.002258  loss: 3.4647 (3.4061)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7259 (0.7588)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [148]  [1400/2502]  eta: 0:06:10  lr: 0.002256  min_lr: 0.002256  loss: 3.3392 (3.4077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6939 (0.7549)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [148]  [1600/2502]  eta: 0:05:02  lr: 0.002255  min_lr: 0.002255  loss: 3.7993 (3.4148)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7552 (0.7560)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [148]  [1800/2502]  eta: 0:03:55  lr: 0.002253  min_lr: 0.002253  loss: 3.2694 (3.4147)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7227 (0.7584)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [148]  [2000/2502]  eta: 0:02:48  lr: 0.002251  min_lr: 0.002251  loss: 3.7730 (3.4199)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8153 (0.7628)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [148]  [2200/2502]  eta: 0:01:41  lr: 0.002249  min_lr: 0.002249  loss: 3.1748 (3.4189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7969 (0.7636)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [148]  [2400/2502]  eta: 0:00:34  lr: 0.002248  min_lr: 0.002248  loss: 3.2303 (3.4152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7754 (0.7641)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [148]  [2501/2502]  eta: 0:00:00  lr: 0.002247  min_lr: 0.002247  loss: 3.4040 (3.4141)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7391 (0.7631)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [148] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.002247  min_lr: 0.002247  loss: 3.4040 (3.4244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7391 (0.7631)
Test:  [ 0/50]  eta: 0:02:08  loss: 0.6702 (0.6702)  acc1: 91.6000 (91.6000)  acc5: 98.8000 (98.8000)  time: 2.5791  data: 2.3812  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9691 (0.9995)  acc1: 81.6000 (83.0909)  acc5: 96.4000 (96.0727)  time: 0.4080  data: 0.2411  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9691 (0.9896)  acc1: 81.2000 (82.1524)  acc5: 96.8000 (96.5714)  time: 0.2635  data: 0.0997  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1677 (1.0967)  acc1: 76.4000 (79.8323)  acc5: 94.4000 (95.1355)  time: 0.3014  data: 0.1375  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.3404 (1.1601)  acc1: 73.2000 (78.1268)  acc5: 91.6000 (94.3317)  time: 0.2500  data: 0.0862  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3404 (1.1815)  acc1: 73.2000 (77.5280)  acc5: 92.0000 (94.1680)  time: 0.1986  data: 0.0349  max mem: 27255
Test: Total time: 0:00:14 (0.2888 s / it)
* Acc@1 77.528 Acc@5 94.388 loss 1.178
Accuracy of the model on the 50000 test images: 77.5%
Max accuracy: 77.83%
Epoch: [149]  [   0/2502]  eta: 1:30:15  lr: 0.002247  min_lr: 0.002247  loss: 3.7461 (3.7461)  weight_decay: 0.0500 (0.0500)  time: 2.1645  data: 1.6826  max mem: 27255
Epoch: [149]  [ 200/2502]  eta: 0:13:08  lr: 0.002245  min_lr: 0.002245  loss: 3.2907 (3.4098)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7076 (0.7209)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [149]  [ 400/2502]  eta: 0:11:52  lr: 0.002243  min_lr: 0.002243  loss: 3.4923 (3.3929)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7314 (0.7492)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [149]  [ 600/2502]  eta: 0:10:40  lr: 0.002241  min_lr: 0.002241  loss: 3.4818 (3.4054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7703 (0.7437)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [149]  [ 800/2502]  eta: 0:09:31  lr: 0.002240  min_lr: 0.002240  loss: 3.3143 (3.4062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7309 (0.7510)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [149]  [1000/2502]  eta: 0:08:24  lr: 0.002238  min_lr: 0.002238  loss: 3.2339 (3.3933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7051 (0.7467)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [149]  [1200/2502]  eta: 0:07:16  lr: 0.002236  min_lr: 0.002236  loss: 3.3639 (3.3953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7442 (0.7480)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [149]  [1400/2502]  eta: 0:06:09  lr: 0.002234  min_lr: 0.002234  loss: 3.6347 (3.3973)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7626 (0.7442)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [149]  [1600/2502]  eta: 0:05:02  lr: 0.002232  min_lr: 0.002232  loss: 3.5607 (3.4021)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7372 (0.7443)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [149]  [1800/2502]  eta: 0:03:55  lr: 0.002231  min_lr: 0.002231  loss: 3.5584 (3.4040)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7165 (0.7427)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [149]  [2000/2502]  eta: 0:02:48  lr: 0.002229  min_lr: 0.002229  loss: 3.2331 (3.4039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7025 (0.7417)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [149]  [2200/2502]  eta: 0:01:41  lr: 0.002227  min_lr: 0.002227  loss: 3.6565 (3.4090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7144 (0.7438)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [149]  [2400/2502]  eta: 0:00:34  lr: 0.002225  min_lr: 0.002225  loss: 3.5563 (3.4087)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7476 (0.7476)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [149]  [2501/2502]  eta: 0:00:00  lr: 0.002224  min_lr: 0.002224  loss: 3.2948 (3.4069)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6813 (0.7467)  time: 0.2997  data: 0.0007  max mem: 27255
Epoch: [149] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.002224  min_lr: 0.002224  loss: 3.2948 (3.4096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6813 (0.7467)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.5787 (0.5787)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 2.6245  data: 2.4310  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9123 (0.9063)  acc1: 83.6000 (83.0182)  acc5: 97.2000 (96.6546)  time: 0.4438  data: 0.2771  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9168 (0.9221)  acc1: 80.8000 (82.3810)  acc5: 96.8000 (96.8000)  time: 0.2196  data: 0.0557  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1151 (1.0214)  acc1: 78.4000 (80.1032)  acc5: 94.8000 (95.2645)  time: 0.2357  data: 0.0719  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3005 (1.0910)  acc1: 73.2000 (78.4098)  acc5: 91.6000 (94.4781)  time: 0.2161  data: 0.0514  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3005 (1.1063)  acc1: 73.2000 (77.9520)  acc5: 91.6000 (94.3920)  time: 0.1690  data: 0.0044  max mem: 27255
Test: Total time: 0:00:12 (0.2582 s / it)
* Acc@1 77.934 Acc@5 94.456 loss 1.108
Accuracy of the model on the 50000 test images: 77.9%
Max accuracy: 77.93%
Epoch: [150]  [   0/2502]  eta: 1:28:30  lr: 0.002224  min_lr: 0.002224  loss: 3.5744 (3.5744)  weight_decay: 0.0500 (0.0500)  time: 2.1225  data: 1.7663  max mem: 27255
Epoch: [150]  [ 200/2502]  eta: 0:13:11  lr: 0.002223  min_lr: 0.002223  loss: 3.5676 (3.3983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7565 (0.7363)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [150]  [ 400/2502]  eta: 0:11:52  lr: 0.002221  min_lr: 0.002221  loss: 3.4043 (3.4043)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7402 (0.7474)  time: 0.3350  data: 0.0009  max mem: 27255
Epoch: [150]  [ 600/2502]  eta: 0:10:42  lr: 0.002219  min_lr: 0.002219  loss: 3.4674 (3.4064)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7588 (0.7531)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [150]  [ 800/2502]  eta: 0:09:33  lr: 0.002217  min_lr: 0.002217  loss: 3.4466 (3.4034)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7274 (nan)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [150]  [1000/2502]  eta: 0:08:25  lr: 0.002215  min_lr: 0.002215  loss: 3.4475 (3.4046)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7432 (nan)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [150]  [1200/2502]  eta: 0:07:17  lr: 0.002214  min_lr: 0.002214  loss: 3.3605 (3.3939)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7015 (nan)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [150]  [1400/2502]  eta: 0:06:09  lr: 0.002212  min_lr: 0.002212  loss: 3.4805 (3.3925)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7794 (nan)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [150]  [1600/2502]  eta: 0:05:02  lr: 0.002210  min_lr: 0.002210  loss: 3.5391 (3.3938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7613 (nan)  time: 0.3423  data: 0.0005  max mem: 27255
Epoch: [150]  [1800/2502]  eta: 0:03:55  lr: 0.002208  min_lr: 0.002208  loss: 3.3182 (3.3955)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7525 (nan)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [150]  [2000/2502]  eta: 0:02:48  lr: 0.002207  min_lr: 0.002207  loss: 3.4508 (3.3988)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7166 (nan)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [150]  [2200/2502]  eta: 0:01:41  lr: 0.002205  min_lr: 0.002205  loss: 3.5942 (3.4007)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6913 (nan)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [150]  [2400/2502]  eta: 0:00:34  lr: 0.002203  min_lr: 0.002203  loss: 3.5107 (3.4013)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7236 (nan)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [150]  [2501/2502]  eta: 0:00:00  lr: 0.002202  min_lr: 0.002202  loss: 3.6692 (3.3988)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7621 (nan)  time: 0.3048  data: 0.0010  max mem: 27255
Epoch: [150] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.002202  min_lr: 0.002202  loss: 3.6692 (3.4080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7621 (nan)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.6629 (0.6629)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.8591  data: 2.6620  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0299 (1.0436)  acc1: 83.2000 (83.0182)  acc5: 96.4000 (96.5091)  time: 0.4367  data: 0.2691  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0658 (1.0710)  acc1: 81.2000 (82.1905)  acc5: 96.4000 (96.5524)  time: 0.2130  data: 0.0470  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2449 (1.1768)  acc1: 76.4000 (79.9097)  acc5: 94.0000 (94.8645)  time: 0.2214  data: 0.0551  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3903 (1.2360)  acc1: 74.0000 (78.2049)  acc5: 92.0000 (94.2146)  time: 0.1937  data: 0.0272  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3679 (1.2566)  acc1: 74.0000 (77.6640)  acc5: 92.4000 (94.0640)  time: 0.1764  data: 0.0085  max mem: 27255
Test: Total time: 0:00:12 (0.2521 s / it)
* Acc@1 77.632 Acc@5 94.264 loss 1.255
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 77.93%
Epoch: [151]  [   0/2502]  eta: 1:18:28  lr: 0.002202  min_lr: 0.002202  loss: 3.4967 (3.4967)  weight_decay: 0.0500 (0.0500)  time: 1.8820  data: 1.5578  max mem: 27255
Epoch: [151]  [ 200/2502]  eta: 0:13:09  lr: 0.002200  min_lr: 0.002200  loss: 3.3531 (3.3517)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7706 (0.7730)  time: 0.3409  data: 0.0004  max mem: 27255
Epoch: [151]  [ 400/2502]  eta: 0:11:52  lr: 0.002198  min_lr: 0.002198  loss: 3.3708 (3.4009)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7518 (0.7620)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [151]  [ 600/2502]  eta: 0:10:42  lr: 0.002197  min_lr: 0.002197  loss: 3.4945 (3.3938)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7630 (0.7638)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [151]  [ 800/2502]  eta: 0:09:33  lr: 0.002195  min_lr: 0.002195  loss: 3.5459 (3.4081)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8682 (0.7696)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [151]  [1000/2502]  eta: 0:08:25  lr: 0.002193  min_lr: 0.002193  loss: 3.4249 (3.4120)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6894 (0.7665)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [151]  [1200/2502]  eta: 0:07:17  lr: 0.002191  min_lr: 0.002191  loss: 3.5591 (3.4137)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7166 (0.7625)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [151]  [1400/2502]  eta: 0:06:10  lr: 0.002190  min_lr: 0.002190  loss: 3.5965 (3.4115)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7490 (0.7709)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [151]  [1600/2502]  eta: 0:05:03  lr: 0.002188  min_lr: 0.002188  loss: 3.0681 (3.4083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7789 (0.7673)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [151]  [1800/2502]  eta: 0:03:55  lr: 0.002186  min_lr: 0.002186  loss: 3.5970 (3.4048)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7320 (0.7660)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [151]  [2000/2502]  eta: 0:02:48  lr: 0.002184  min_lr: 0.002184  loss: 3.0505 (3.4009)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6850 (0.7636)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [151]  [2200/2502]  eta: 0:01:41  lr: 0.002182  min_lr: 0.002182  loss: 3.3704 (3.4080)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8004 (0.7657)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [151]  [2400/2502]  eta: 0:00:34  lr: 0.002181  min_lr: 0.002181  loss: 3.5457 (3.4022)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7166 (0.7628)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [151]  [2501/2502]  eta: 0:00:00  lr: 0.002180  min_lr: 0.002180  loss: 3.4288 (3.4016)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7004 (0.7616)  time: 0.3000  data: 0.0008  max mem: 27255
Epoch: [151] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.002180  min_lr: 0.002180  loss: 3.4288 (3.4059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7004 (0.7616)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.6658 (0.6658)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 2.8831  data: 2.6937  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8699 (0.9131)  acc1: 84.8000 (83.5636)  acc5: 97.2000 (97.0909)  time: 0.4114  data: 0.2452  max mem: 27255
Test:  [20/50]  eta: 0:00:08  loss: 0.8903 (0.9279)  acc1: 81.6000 (82.4381)  acc5: 96.8000 (96.8381)  time: 0.1686  data: 0.0047  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0339 (1.0271)  acc1: 77.2000 (80.3355)  acc5: 94.0000 (95.5742)  time: 0.1727  data: 0.0088  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2885 (1.0939)  acc1: 73.6000 (78.6829)  acc5: 92.4000 (94.8000)  time: 0.2044  data: 0.0405  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2348 (1.1114)  acc1: 73.6000 (78.1360)  acc5: 92.8000 (94.5520)  time: 0.2033  data: 0.0393  max mem: 27255
Test: Total time: 0:00:11 (0.2392 s / it)
* Acc@1 77.902 Acc@5 94.474 loss 1.116
Accuracy of the model on the 50000 test images: 77.9%
Max accuracy: 77.93%
Epoch: [152]  [   0/2502]  eta: 2:14:43  lr: 0.002180  min_lr: 0.002180  loss: 3.7794 (3.7794)  weight_decay: 0.0500 (0.0500)  time: 3.2307  data: 2.2111  max mem: 27255
Epoch: [152]  [ 200/2502]  eta: 0:13:32  lr: 0.002178  min_lr: 0.002178  loss: 3.3944 (3.3954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7321 (0.7672)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [152]  [ 400/2502]  eta: 0:12:02  lr: 0.002176  min_lr: 0.002176  loss: 3.3436 (3.3919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7371 (0.7718)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [152]  [ 600/2502]  eta: 0:10:47  lr: 0.002174  min_lr: 0.002174  loss: 3.2874 (3.3956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7264 (0.7630)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [152]  [ 800/2502]  eta: 0:09:36  lr: 0.002173  min_lr: 0.002173  loss: 3.6003 (3.4000)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7480 (0.7825)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [152]  [1000/2502]  eta: 0:08:27  lr: 0.002171  min_lr: 0.002171  loss: 3.6311 (3.3960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7353 (0.7830)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [152]  [1200/2502]  eta: 0:07:18  lr: 0.002169  min_lr: 0.002169  loss: 3.4597 (3.3886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7120 (0.7759)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [152]  [1400/2502]  eta: 0:06:10  lr: 0.002167  min_lr: 0.002167  loss: 3.5266 (3.3835)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7982 (0.7752)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [152]  [1600/2502]  eta: 0:05:03  lr: 0.002165  min_lr: 0.002165  loss: 3.2842 (3.3943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7300 (0.7746)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [152]  [1800/2502]  eta: 0:03:55  lr: 0.002164  min_lr: 0.002164  loss: 3.6546 (3.3943)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8017 (0.7761)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [152]  [2000/2502]  eta: 0:02:48  lr: 0.002162  min_lr: 0.002162  loss: 3.4573 (3.3981)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7867 (0.7738)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [152]  [2200/2502]  eta: 0:01:41  lr: 0.002160  min_lr: 0.002160  loss: 3.6291 (3.3965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7069 (0.7731)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [152]  [2400/2502]  eta: 0:00:34  lr: 0.002158  min_lr: 0.002158  loss: 3.5772 (3.3944)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6704 (0.7708)  time: 0.3386  data: 0.0004  max mem: 27255
Epoch: [152]  [2501/2502]  eta: 0:00:00  lr: 0.002157  min_lr: 0.002157  loss: 3.7676 (3.3969)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7750 (0.7705)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [152] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.002157  min_lr: 0.002157  loss: 3.7676 (3.3984)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7750 (0.7705)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.6837 (0.6837)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 2.7449  data: 2.5414  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 1.0683 (1.0515)  acc1: 81.6000 (83.0545)  acc5: 97.2000 (96.8000)  time: 0.4274  data: 0.2572  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0683 (1.0709)  acc1: 81.6000 (82.1714)  acc5: 96.8000 (96.8571)  time: 0.2182  data: 0.0523  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.2056 (1.1707)  acc1: 78.4000 (80.3226)  acc5: 94.4000 (95.5484)  time: 0.2585  data: 0.0942  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4610 (1.2428)  acc1: 73.6000 (78.2732)  acc5: 92.0000 (94.7415)  time: 0.2201  data: 0.0563  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4105 (1.2641)  acc1: 72.4000 (77.5920)  acc5: 92.0000 (94.5360)  time: 0.1638  data: 0.0001  max mem: 27255
Test: Total time: 0:00:13 (0.2616 s / it)
* Acc@1 77.722 Acc@5 94.556 loss 1.264
Accuracy of the model on the 50000 test images: 77.7%
Max accuracy: 77.93%
Epoch: [153]  [   0/2502]  eta: 1:29:29  lr: 0.002157  min_lr: 0.002157  loss: 3.9133 (3.9133)  weight_decay: 0.0500 (0.0500)  time: 2.1459  data: 1.5281  max mem: 27255
Epoch: [153]  [ 200/2502]  eta: 0:13:10  lr: 0.002156  min_lr: 0.002156  loss: 3.3466 (3.3488)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7333 (0.7582)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [153]  [ 400/2502]  eta: 0:11:51  lr: 0.002154  min_lr: 0.002154  loss: 3.5701 (3.3643)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7819 (0.7916)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [153]  [ 600/2502]  eta: 0:10:41  lr: 0.002152  min_lr: 0.002152  loss: 3.5005 (3.3796)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7878 (0.7949)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [153]  [ 800/2502]  eta: 0:09:32  lr: 0.002150  min_lr: 0.002150  loss: 3.5049 (3.3914)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7629 (0.7854)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [153]  [1000/2502]  eta: 0:08:24  lr: 0.002148  min_lr: 0.002148  loss: 3.4126 (3.3827)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7583 (0.7861)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [153]  [1200/2502]  eta: 0:07:17  lr: 0.002147  min_lr: 0.002147  loss: 3.6381 (3.3893)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7662 (0.7826)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [153]  [1400/2502]  eta: 0:06:09  lr: 0.002145  min_lr: 0.002145  loss: 3.4308 (3.3900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.6789 (0.7773)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [153]  [1600/2502]  eta: 0:05:03  lr: 0.002143  min_lr: 0.002143  loss: 3.4794 (3.3931)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7272 (0.7759)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [153]  [1800/2502]  eta: 0:03:55  lr: 0.002141  min_lr: 0.002141  loss: 3.4391 (3.3877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7557 (0.7780)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [153]  [2000/2502]  eta: 0:02:48  lr: 0.002139  min_lr: 0.002139  loss: 3.1124 (3.3942)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7250 (0.7763)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [153]  [2200/2502]  eta: 0:01:41  lr: 0.002138  min_lr: 0.002138  loss: 3.2487 (3.3927)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8146 (0.7830)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [153]  [2400/2502]  eta: 0:00:34  lr: 0.002136  min_lr: 0.002136  loss: 3.4675 (3.3916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7409 (0.7803)  time: 0.3356  data: 0.0005  max mem: 27255
Epoch: [153]  [2501/2502]  eta: 0:00:00  lr: 0.002135  min_lr: 0.002135  loss: 3.5525 (3.3911)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7109 (0.7783)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [153] Total time: 0:13:59 (0.3353 s / it)
Averaged stats: lr: 0.002135  min_lr: 0.002135  loss: 3.5525 (3.3969)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7109 (0.7783)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.6832 (0.6832)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.3004  data: 3.1176  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9250 (0.9579)  acc1: 84.4000 (83.8545)  acc5: 96.8000 (96.7636)  time: 0.4670  data: 0.3014  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9694 (0.9806)  acc1: 82.0000 (82.3810)  acc5: 96.8000 (96.8762)  time: 0.2245  data: 0.0605  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1682 (1.0822)  acc1: 78.0000 (80.4387)  acc5: 94.8000 (95.5226)  time: 0.2220  data: 0.0580  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3255 (1.1546)  acc1: 72.8000 (78.3902)  acc5: 92.0000 (94.7024)  time: 0.1785  data: 0.0147  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3764 (1.1747)  acc1: 72.4000 (77.8480)  acc5: 92.4000 (94.4960)  time: 0.1782  data: 0.0145  max mem: 27255
Test: Total time: 0:00:12 (0.2585 s / it)
* Acc@1 77.912 Acc@5 94.536 loss 1.177
Accuracy of the model on the 50000 test images: 77.9%
Max accuracy: 77.93%
Epoch: [154]  [   0/2502]  eta: 1:29:55  lr: 0.002135  min_lr: 0.002135  loss: 4.0739 (4.0739)  weight_decay: 0.0500 (0.0500)  time: 2.1565  data: 1.8102  max mem: 27255
Epoch: [154]  [ 200/2502]  eta: 0:13:12  lr: 0.002133  min_lr: 0.002133  loss: 3.5154 (3.3449)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7831 (0.7758)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [154]  [ 400/2502]  eta: 0:11:53  lr: 0.002131  min_lr: 0.002131  loss: 3.6167 (3.3678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8010 (0.7878)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [154]  [ 600/2502]  eta: 0:10:42  lr: 0.002130  min_lr: 0.002130  loss: 3.5842 (3.3851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7700 (0.7779)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [154]  [ 800/2502]  eta: 0:09:33  lr: 0.002128  min_lr: 0.002128  loss: 3.5660 (3.3965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8131 (0.7787)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [154]  [1000/2502]  eta: 0:08:25  lr: 0.002126  min_lr: 0.002126  loss: 3.2988 (3.3813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7204 (0.7743)  time: 0.3361  data: 0.0005  max mem: 27255
Epoch: [154]  [1200/2502]  eta: 0:07:17  lr: 0.002124  min_lr: 0.002124  loss: 3.4472 (3.3851)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8352 (0.7733)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [154]  [1400/2502]  eta: 0:06:10  lr: 0.002122  min_lr: 0.002122  loss: 3.4211 (3.3919)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7356 (0.7782)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [154]  [1600/2502]  eta: 0:05:03  lr: 0.002121  min_lr: 0.002121  loss: 3.5357 (3.3954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7540 (0.7789)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [154]  [1800/2502]  eta: 0:03:55  lr: 0.002119  min_lr: 0.002119  loss: 3.4496 (3.3924)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7661 (0.7786)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [154]  [2000/2502]  eta: 0:02:48  lr: 0.002117  min_lr: 0.002117  loss: 3.6897 (3.3961)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7573 (0.7784)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [154]  [2200/2502]  eta: 0:01:41  lr: 0.002115  min_lr: 0.002115  loss: 3.5533 (3.3975)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7705 (0.7763)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [154]  [2400/2502]  eta: 0:00:34  lr: 0.002114  min_lr: 0.002114  loss: 3.5555 (3.3989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8213 (0.7780)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [154]  [2501/2502]  eta: 0:00:00  lr: 0.002113  min_lr: 0.002113  loss: 3.2392 (3.3973)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7002 (0.7762)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [154] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.002113  min_lr: 0.002113  loss: 3.2392 (3.3900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7002 (0.7762)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.5691 (0.5691)  acc1: 91.2000 (91.2000)  acc5: 99.2000 (99.2000)  time: 2.9716  data: 2.7691  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9141 (0.9197)  acc1: 84.4000 (83.1636)  acc5: 97.2000 (96.8727)  time: 0.4688  data: 0.3006  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9141 (0.9309)  acc1: 82.0000 (82.5143)  acc5: 97.2000 (97.0286)  time: 0.1939  data: 0.0296  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0618 (1.0113)  acc1: 79.2000 (80.7226)  acc5: 95.2000 (95.7677)  time: 0.1766  data: 0.0128  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2631 (1.0774)  acc1: 73.6000 (78.7415)  acc5: 92.4000 (94.8781)  time: 0.1990  data: 0.0352  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2631 (1.0974)  acc1: 72.8000 (77.9120)  acc5: 92.8000 (94.7360)  time: 0.1987  data: 0.0351  max mem: 27255
Test: Total time: 0:00:12 (0.2481 s / it)
* Acc@1 78.032 Acc@5 94.610 loss 1.102
Accuracy of the model on the 50000 test images: 78.0%
Max accuracy: 78.03%
Epoch: [155]  [   0/2502]  eta: 1:19:04  lr: 0.002113  min_lr: 0.002113  loss: 3.7516 (3.7516)  weight_decay: 0.0500 (0.0500)  time: 1.8963  data: 1.5446  max mem: 27255
Epoch: [155]  [ 200/2502]  eta: 0:13:07  lr: 0.002111  min_lr: 0.002111  loss: 3.3323 (3.3707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7318 (0.7464)  time: 0.3359  data: 0.0005  max mem: 27255
Epoch: [155]  [ 400/2502]  eta: 0:11:51  lr: 0.002109  min_lr: 0.002109  loss: 3.3516 (3.3471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7756 (0.7625)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [155]  [ 600/2502]  eta: 0:10:41  lr: 0.002107  min_lr: 0.002107  loss: 3.5846 (3.3693)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7422 (0.7616)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [155]  [ 800/2502]  eta: 0:09:32  lr: 0.002105  min_lr: 0.002105  loss: 3.5624 (3.3733)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7492 (0.7610)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [155]  [1000/2502]  eta: 0:08:24  lr: 0.002104  min_lr: 0.002104  loss: 3.7819 (3.3781)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7863 (0.7726)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [155]  [1200/2502]  eta: 0:07:16  lr: 0.002102  min_lr: 0.002102  loss: 3.2904 (3.3801)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7614 (0.7771)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [155]  [1400/2502]  eta: 0:06:09  lr: 0.002100  min_lr: 0.002100  loss: 3.3891 (3.3834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7319 (0.7738)  time: 0.3408  data: 0.0003  max mem: 27255
Epoch: [155]  [1600/2502]  eta: 0:05:02  lr: 0.002098  min_lr: 0.002098  loss: 3.6610 (3.3816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7710 (0.7753)  time: 0.3384  data: 0.0004  max mem: 27255
Epoch: [155]  [1800/2502]  eta: 0:03:55  lr: 0.002096  min_lr: 0.002096  loss: 3.5899 (3.3806)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7340 (0.7740)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [155]  [2000/2502]  eta: 0:02:48  lr: 0.002095  min_lr: 0.002095  loss: 3.5351 (3.3842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8258 (0.7758)  time: 0.3326  data: 0.0003  max mem: 27255
Epoch: [155]  [2200/2502]  eta: 0:01:41  lr: 0.002093  min_lr: 0.002093  loss: 3.4493 (3.3910)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7562 (0.7761)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [155]  [2400/2502]  eta: 0:00:34  lr: 0.002091  min_lr: 0.002091  loss: 3.5102 (3.3953)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7539 (0.7772)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [155]  [2501/2502]  eta: 0:00:00  lr: 0.002090  min_lr: 0.002090  loss: 3.5627 (3.3981)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7450 (0.7778)  time: 0.2996  data: 0.0007  max mem: 27255
Epoch: [155] Total time: 0:13:57 (0.3346 s / it)
Averaged stats: lr: 0.002090  min_lr: 0.002090  loss: 3.5627 (3.3838)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7450 (0.7778)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.5754 (0.5754)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.2456  data: 3.0437  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9821 (0.9991)  acc1: 83.2000 (83.1636)  acc5: 96.8000 (96.6909)  time: 0.4576  data: 0.2887  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0186 (1.0194)  acc1: 80.4000 (82.2857)  acc5: 96.8000 (96.8000)  time: 0.1741  data: 0.0069  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2131 (1.1317)  acc1: 79.2000 (79.8968)  acc5: 94.0000 (95.3806)  time: 0.1785  data: 0.0087  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3987 (1.1999)  acc1: 73.2000 (78.1171)  acc5: 92.4000 (94.5854)  time: 0.1947  data: 0.0274  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3829 (1.2190)  acc1: 73.2000 (77.4560)  acc5: 92.4000 (94.4800)  time: 0.1877  data: 0.0237  max mem: 27255
Test: Total time: 0:00:12 (0.2461 s / it)
* Acc@1 77.634 Acc@5 94.454 loss 1.219
Accuracy of the model on the 50000 test images: 77.6%
Max accuracy: 78.03%
Epoch: [156]  [   0/2502]  eta: 1:30:48  lr: 0.002090  min_lr: 0.002090  loss: 3.1467 (3.1467)  weight_decay: 0.0500 (0.0500)  time: 2.1778  data: 1.3892  max mem: 27255
Epoch: [156]  [ 200/2502]  eta: 0:13:10  lr: 0.002088  min_lr: 0.002088  loss: 3.4273 (3.3323)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7242 (0.7721)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [156]  [ 400/2502]  eta: 0:11:51  lr: 0.002087  min_lr: 0.002087  loss: 3.3655 (3.3580)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7302 (0.7768)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [156]  [ 600/2502]  eta: 0:10:41  lr: 0.002085  min_lr: 0.002085  loss: 3.4772 (3.3581)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7523 (0.7782)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [156]  [ 800/2502]  eta: 0:09:33  lr: 0.002083  min_lr: 0.002083  loss: 3.5545 (3.3740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7507 (0.7879)  time: 0.3425  data: 0.0006  max mem: 27255
Epoch: [156]  [1000/2502]  eta: 0:08:25  lr: 0.002081  min_lr: 0.002081  loss: 3.2983 (3.3793)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7787 (0.7896)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [156]  [1200/2502]  eta: 0:07:17  lr: 0.002079  min_lr: 0.002079  loss: 3.5227 (3.3868)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7748 (0.7904)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [156]  [1400/2502]  eta: 0:06:10  lr: 0.002078  min_lr: 0.002078  loss: 3.3327 (3.3810)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7567 (0.7918)  time: 0.3410  data: 0.0004  max mem: 27255
Epoch: [156]  [1600/2502]  eta: 0:05:02  lr: 0.002076  min_lr: 0.002076  loss: 3.1593 (3.3742)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7166 (0.7894)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [156]  [1800/2502]  eta: 0:03:55  lr: 0.002074  min_lr: 0.002074  loss: 3.5891 (3.3836)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8235 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [156]  [2000/2502]  eta: 0:02:48  lr: 0.002072  min_lr: 0.002072  loss: 3.4917 (3.3858)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8168 (inf)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [156]  [2200/2502]  eta: 0:01:41  lr: 0.002070  min_lr: 0.002070  loss: 3.5233 (3.3821)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7548 (inf)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [156]  [2400/2502]  eta: 0:00:34  lr: 0.002069  min_lr: 0.002069  loss: 3.3296 (3.3861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7684 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [156]  [2501/2502]  eta: 0:00:00  lr: 0.002068  min_lr: 0.002068  loss: 3.7792 (3.3869)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7830 (inf)  time: 0.2999  data: 0.0012  max mem: 27255
Epoch: [156] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.002068  min_lr: 0.002068  loss: 3.7792 (3.3829)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7830 (inf)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.8322 (0.8322)  acc1: 90.4000 (90.4000)  acc5: 99.2000 (99.2000)  time: 3.1431  data: 2.9415  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9779 (1.0504)  acc1: 83.6000 (83.7091)  acc5: 97.2000 (96.6546)  time: 0.4428  data: 0.2754  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 1.0612 (1.0776)  acc1: 81.6000 (82.6095)  acc5: 97.2000 (96.7429)  time: 0.1740  data: 0.0101  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2563 (1.1846)  acc1: 77.6000 (80.5032)  acc5: 94.8000 (95.4710)  time: 0.2000  data: 0.0356  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4331 (1.2506)  acc1: 74.8000 (78.7610)  acc5: 92.0000 (94.6342)  time: 0.2050  data: 0.0409  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.4259 (1.2664)  acc1: 73.2000 (78.2640)  acc5: 92.4000 (94.5280)  time: 0.1836  data: 0.0201  max mem: 27255
Test: Total time: 0:00:12 (0.2496 s / it)
* Acc@1 78.022 Acc@5 94.444 loss 1.275
Accuracy of the model on the 50000 test images: 78.0%
Max accuracy: 78.03%
Epoch: [157]  [   0/2502]  eta: 1:22:55  lr: 0.002068  min_lr: 0.002068  loss: 3.7549 (3.7549)  weight_decay: 0.0500 (0.0500)  time: 1.9886  data: 1.3920  max mem: 27255
Epoch: [157]  [ 200/2502]  eta: 0:13:08  lr: 0.002066  min_lr: 0.002066  loss: 3.3285 (3.3784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7818 (0.8204)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [157]  [ 400/2502]  eta: 0:11:52  lr: 0.002064  min_lr: 0.002064  loss: 3.4195 (3.3743)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7467 (0.8097)  time: 0.3436  data: 0.0004  max mem: 27255
Epoch: [157]  [ 600/2502]  eta: 0:10:42  lr: 0.002062  min_lr: 0.002062  loss: 3.5530 (3.3899)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7341 (0.8076)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [157]  [ 800/2502]  eta: 0:09:33  lr: 0.002061  min_lr: 0.002061  loss: 3.3887 (3.3887)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7492 (0.7978)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [157]  [1000/2502]  eta: 0:08:24  lr: 0.002059  min_lr: 0.002059  loss: 3.4811 (3.3861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7577 (0.7897)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [157]  [1200/2502]  eta: 0:07:17  lr: 0.002057  min_lr: 0.002057  loss: 3.4948 (3.3765)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7856 (0.7915)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [157]  [1400/2502]  eta: 0:06:10  lr: 0.002055  min_lr: 0.002055  loss: 3.4720 (3.3743)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7666 (0.7889)  time: 0.3326  data: 0.0005  max mem: 27255
Epoch: [157]  [1600/2502]  eta: 0:05:02  lr: 0.002053  min_lr: 0.002053  loss: 3.3129 (3.3766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7271 (0.7879)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [157]  [1800/2502]  eta: 0:03:55  lr: 0.002052  min_lr: 0.002052  loss: 3.2951 (3.3788)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7663 (0.7906)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [157]  [2000/2502]  eta: 0:02:48  lr: 0.002050  min_lr: 0.002050  loss: 3.5048 (3.3764)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7289 (0.7928)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [157]  [2200/2502]  eta: 0:01:41  lr: 0.002048  min_lr: 0.002048  loss: 3.6599 (3.3792)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7809 (0.7900)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [157]  [2400/2502]  eta: 0:00:34  lr: 0.002046  min_lr: 0.002046  loss: 3.5063 (3.3789)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8218 (0.7939)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [157]  [2501/2502]  eta: 0:00:00  lr: 0.002045  min_lr: 0.002045  loss: 3.4924 (3.3757)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7593 (0.7927)  time: 0.3005  data: 0.0007  max mem: 27255
Epoch: [157] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.002045  min_lr: 0.002045  loss: 3.4924 (3.3759)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7593 (0.7927)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.6315 (0.6315)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 2.9500  data: 2.7463  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9512 (0.9889)  acc1: 83.6000 (83.8545)  acc5: 97.2000 (97.1273)  time: 0.4192  data: 0.2508  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9985 (1.0093)  acc1: 82.4000 (83.0286)  acc5: 96.8000 (97.1048)  time: 0.1836  data: 0.0192  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1469 (1.1098)  acc1: 78.4000 (80.7226)  acc5: 95.2000 (95.6129)  time: 0.2098  data: 0.0459  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3850 (1.1768)  acc1: 73.2000 (78.8585)  acc5: 92.0000 (94.8585)  time: 0.2067  data: 0.0429  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3850 (1.2000)  acc1: 72.4000 (78.1760)  acc5: 92.8000 (94.6960)  time: 0.2062  data: 0.0426  max mem: 27255
Test: Total time: 0:00:12 (0.2468 s / it)
* Acc@1 78.126 Acc@5 94.520 loss 1.206
Accuracy of the model on the 50000 test images: 78.1%
Max accuracy: 78.13%
Epoch: [158]  [   0/2502]  eta: 1:43:54  lr: 0.002045  min_lr: 0.002045  loss: 3.6562 (3.6562)  weight_decay: 0.0500 (0.0500)  time: 2.4919  data: 2.1419  max mem: 27255
Epoch: [158]  [ 200/2502]  eta: 0:13:13  lr: 0.002044  min_lr: 0.002044  loss: 3.3915 (3.3479)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7575 (0.7816)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [158]  [ 400/2502]  eta: 0:11:53  lr: 0.002042  min_lr: 0.002042  loss: 3.5302 (3.3688)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8401 (0.7981)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [158]  [ 600/2502]  eta: 0:10:42  lr: 0.002040  min_lr: 0.002040  loss: 3.4352 (3.3573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7293 (0.7885)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [158]  [ 800/2502]  eta: 0:09:33  lr: 0.002038  min_lr: 0.002038  loss: 3.4422 (3.3609)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7798 (0.7928)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [158]  [1000/2502]  eta: 0:08:25  lr: 0.002036  min_lr: 0.002036  loss: 3.4762 (3.3653)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7871 (0.7880)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [158]  [1200/2502]  eta: 0:07:17  lr: 0.002035  min_lr: 0.002035  loss: 3.4685 (3.3682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7973 (0.7881)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [158]  [1400/2502]  eta: 0:06:10  lr: 0.002033  min_lr: 0.002033  loss: 3.3147 (3.3673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7818 (0.7924)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [158]  [1600/2502]  eta: 0:05:02  lr: 0.002031  min_lr: 0.002031  loss: 3.3285 (3.3685)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8015 (0.7954)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [158]  [1800/2502]  eta: 0:03:55  lr: 0.002029  min_lr: 0.002029  loss: 3.4880 (3.3723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7355 (0.7935)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [158]  [2000/2502]  eta: 0:02:48  lr: 0.002027  min_lr: 0.002027  loss: 3.6862 (3.3744)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7881 (0.7929)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [158]  [2200/2502]  eta: 0:01:41  lr: 0.002026  min_lr: 0.002026  loss: 3.4017 (3.3766)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8834 (0.7939)  time: 0.3424  data: 0.0004  max mem: 27255
Epoch: [158]  [2400/2502]  eta: 0:00:34  lr: 0.002024  min_lr: 0.002024  loss: 3.3973 (3.3782)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7406 (0.7930)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [158]  [2501/2502]  eta: 0:00:00  lr: 0.002023  min_lr: 0.002023  loss: 3.3173 (3.3790)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7835 (0.7936)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [158] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.002023  min_lr: 0.002023  loss: 3.3173 (3.3805)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7835 (0.7936)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5780 (0.5780)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 3.0525  data: 2.8473  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.9107 (0.9217)  acc1: 83.6000 (83.1273)  acc5: 97.2000 (96.9818)  time: 0.5146  data: 0.3452  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9107 (0.9282)  acc1: 82.8000 (82.7238)  acc5: 97.2000 (96.9143)  time: 0.2500  data: 0.0815  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1270 (1.0331)  acc1: 78.8000 (80.5806)  acc5: 94.0000 (95.5226)  time: 0.2398  data: 0.0712  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2981 (1.0985)  acc1: 73.2000 (78.7707)  acc5: 92.0000 (94.8390)  time: 0.2048  data: 0.0400  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2970 (1.1129)  acc1: 73.2000 (78.2880)  acc5: 92.4000 (94.7120)  time: 0.2034  data: 0.0398  max mem: 27255
Test: Total time: 0:00:13 (0.2745 s / it)
* Acc@1 78.392 Acc@5 94.728 loss 1.117
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.39%
Epoch: [159]  [   0/2502]  eta: 1:17:34  lr: 0.002023  min_lr: 0.002023  loss: 3.5703 (3.5703)  weight_decay: 0.0500 (0.0500)  time: 1.8603  data: 1.5167  max mem: 27255
Epoch: [159]  [ 200/2502]  eta: 0:13:06  lr: 0.002021  min_lr: 0.002021  loss: 3.3159 (3.3648)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8688 (0.8201)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [159]  [ 400/2502]  eta: 0:11:50  lr: 0.002019  min_lr: 0.002019  loss: 3.3550 (3.3383)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7870 (0.8379)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [159]  [ 600/2502]  eta: 0:10:42  lr: 0.002018  min_lr: 0.002018  loss: 3.4868 (3.3593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7489 (0.8206)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [159]  [ 800/2502]  eta: 0:09:33  lr: 0.002016  min_lr: 0.002016  loss: 3.4899 (3.3593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7643 (0.8124)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [159]  [1000/2502]  eta: 0:08:25  lr: 0.002014  min_lr: 0.002014  loss: 3.5113 (3.3726)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8086 (0.8145)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [159]  [1200/2502]  eta: 0:07:17  lr: 0.002012  min_lr: 0.002012  loss: 3.5061 (3.3729)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7850 (0.8087)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [159]  [1400/2502]  eta: 0:06:10  lr: 0.002010  min_lr: 0.002010  loss: 3.3137 (3.3815)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8307 (0.8056)  time: 0.3371  data: 0.0005  max mem: 27255
Epoch: [159]  [1600/2502]  eta: 0:05:03  lr: 0.002009  min_lr: 0.002009  loss: 3.4772 (3.3821)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7515 (0.8018)  time: 0.3436  data: 0.0005  max mem: 27255
Epoch: [159]  [1800/2502]  eta: 0:03:55  lr: 0.002007  min_lr: 0.002007  loss: 3.4179 (3.3821)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8063 (0.8002)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [159]  [2000/2502]  eta: 0:02:48  lr: 0.002005  min_lr: 0.002005  loss: 3.2644 (3.3775)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7857 (0.8013)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [159]  [2200/2502]  eta: 0:01:41  lr: 0.002003  min_lr: 0.002003  loss: 3.4815 (3.3829)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [159]  [2400/2502]  eta: 0:00:34  lr: 0.002001  min_lr: 0.002001  loss: 3.5582 (3.3808)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7569 (nan)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [159]  [2501/2502]  eta: 0:00:00  lr: 0.002001  min_lr: 0.002001  loss: 3.4585 (3.3798)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8486 (nan)  time: 0.3007  data: 0.0008  max mem: 27255
Epoch: [159] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.002001  min_lr: 0.002001  loss: 3.4585 (3.3773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8486 (nan)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.6467 (0.6467)  acc1: 91.6000 (91.6000)  acc5: 99.2000 (99.2000)  time: 2.9388  data: 2.7331  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8768 (0.9088)  acc1: 83.6000 (83.7455)  acc5: 97.2000 (97.0909)  time: 0.4180  data: 0.2502  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9474 (0.9444)  acc1: 81.6000 (82.9905)  acc5: 96.8000 (97.0286)  time: 0.2262  data: 0.0622  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0527 (1.0461)  acc1: 78.4000 (80.7742)  acc5: 95.6000 (95.5097)  time: 0.2617  data: 0.0977  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3047 (1.1135)  acc1: 75.2000 (79.0732)  acc5: 92.0000 (94.7317)  time: 0.2053  data: 0.0402  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2876 (1.1307)  acc1: 74.0000 (78.3920)  acc5: 92.4000 (94.5680)  time: 0.1687  data: 0.0038  max mem: 27255
Test: Total time: 0:00:13 (0.2630 s / it)
* Acc@1 78.288 Acc@5 94.594 loss 1.138
Accuracy of the model on the 50000 test images: 78.3%
Max accuracy: 78.39%
Epoch: [160]  [   0/2502]  eta: 1:31:50  lr: 0.002001  min_lr: 0.002001  loss: 2.8977 (2.8977)  weight_decay: 0.0500 (0.0500)  time: 2.2026  data: 1.5661  max mem: 27255
Epoch: [160]  [ 200/2502]  eta: 0:13:11  lr: 0.001999  min_lr: 0.001999  loss: 3.3613 (3.3486)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7352 (0.7790)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [160]  [ 400/2502]  eta: 0:11:53  lr: 0.001997  min_lr: 0.001997  loss: 3.4242 (3.3443)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7648 (0.7702)  time: 0.3457  data: 0.0004  max mem: 27255
Epoch: [160]  [ 600/2502]  eta: 0:10:42  lr: 0.001995  min_lr: 0.001995  loss: 3.6207 (3.3550)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7868 (0.7835)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [160]  [ 800/2502]  eta: 0:09:33  lr: 0.001993  min_lr: 0.001993  loss: 3.4251 (3.3551)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8124 (0.7870)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [160]  [1000/2502]  eta: 0:08:25  lr: 0.001992  min_lr: 0.001992  loss: 3.5117 (3.3397)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8434 (0.8014)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [160]  [1200/2502]  eta: 0:07:17  lr: 0.001990  min_lr: 0.001990  loss: 3.5466 (3.3492)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8052 (0.8055)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [160]  [1400/2502]  eta: 0:06:10  lr: 0.001988  min_lr: 0.001988  loss: 3.5922 (3.3499)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7855 (0.8026)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [160]  [1600/2502]  eta: 0:05:03  lr: 0.001986  min_lr: 0.001986  loss: 3.0145 (3.3503)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7653 (0.8054)  time: 0.3372  data: 0.0005  max mem: 27255
Epoch: [160]  [1800/2502]  eta: 0:03:55  lr: 0.001984  min_lr: 0.001984  loss: 3.1236 (3.3544)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7920 (0.8050)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [160]  [2000/2502]  eta: 0:02:48  lr: 0.001983  min_lr: 0.001983  loss: 3.5038 (3.3558)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7972 (0.8098)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [160]  [2200/2502]  eta: 0:01:41  lr: 0.001981  min_lr: 0.001981  loss: 3.5346 (3.3569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7125 (0.8040)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [160]  [2400/2502]  eta: 0:00:34  lr: 0.001979  min_lr: 0.001979  loss: 3.2962 (3.3608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8268 (0.8048)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [160]  [2501/2502]  eta: 0:00:00  lr: 0.001978  min_lr: 0.001978  loss: 3.5972 (3.3640)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7792 (0.8038)  time: 0.3006  data: 0.0009  max mem: 27255
Epoch: [160] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.001978  min_lr: 0.001978  loss: 3.5972 (3.3697)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7792 (0.8038)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.6827 (0.6827)  acc1: 90.4000 (90.4000)  acc5: 99.6000 (99.6000)  time: 2.7502  data: 2.5563  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9677 (1.0200)  acc1: 83.6000 (83.7818)  acc5: 97.2000 (96.9091)  time: 0.4744  data: 0.3078  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0542 (1.0531)  acc1: 81.2000 (82.4762)  acc5: 96.8000 (96.9333)  time: 0.2392  data: 0.0752  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1919 (1.1403)  acc1: 77.6000 (80.3097)  acc5: 94.8000 (95.5742)  time: 0.2367  data: 0.0727  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4034 (1.2008)  acc1: 74.0000 (78.8098)  acc5: 92.4000 (94.8683)  time: 0.2092  data: 0.0453  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3652 (1.2175)  acc1: 75.6000 (78.2960)  acc5: 93.2000 (94.7520)  time: 0.2088  data: 0.0451  max mem: 27255
Test: Total time: 0:00:13 (0.2661 s / it)
* Acc@1 78.266 Acc@5 94.808 loss 1.217
Accuracy of the model on the 50000 test images: 78.3%
Max accuracy: 78.39%
Epoch: [161]  [   0/2502]  eta: 1:28:29  lr: 0.001978  min_lr: 0.001978  loss: 3.6337 (3.6337)  weight_decay: 0.0500 (0.0500)  time: 2.1220  data: 1.6731  max mem: 27255
Epoch: [161]  [ 200/2502]  eta: 0:13:10  lr: 0.001976  min_lr: 0.001976  loss: 3.3706 (3.3330)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8242 (0.8426)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [161]  [ 400/2502]  eta: 0:11:53  lr: 0.001974  min_lr: 0.001974  loss: 3.2895 (3.3496)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7980 (0.8150)  time: 0.3417  data: 0.0004  max mem: 27255
Epoch: [161]  [ 600/2502]  eta: 0:10:41  lr: 0.001973  min_lr: 0.001973  loss: 3.4080 (3.3358)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8024 (0.8212)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [161]  [ 800/2502]  eta: 0:09:32  lr: 0.001971  min_lr: 0.001971  loss: 3.6211 (3.3474)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7340 (0.8054)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [161]  [1000/2502]  eta: 0:08:24  lr: 0.001969  min_lr: 0.001969  loss: 3.2788 (3.3611)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8166 (0.8039)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [161]  [1200/2502]  eta: 0:07:16  lr: 0.001967  min_lr: 0.001967  loss: 3.5992 (3.3759)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8368 (0.8027)  time: 0.3324  data: 0.0004  max mem: 27255
Epoch: [161]  [1400/2502]  eta: 0:06:09  lr: 0.001966  min_lr: 0.001966  loss: 3.1724 (3.3731)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7385 (0.7966)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [161]  [1600/2502]  eta: 0:05:02  lr: 0.001964  min_lr: 0.001964  loss: 3.4517 (3.3699)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8297 (0.8012)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [161]  [1800/2502]  eta: 0:03:55  lr: 0.001962  min_lr: 0.001962  loss: 3.3693 (3.3719)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7477 (0.8006)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [161]  [2000/2502]  eta: 0:02:48  lr: 0.001960  min_lr: 0.001960  loss: 3.6503 (3.3703)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8221 (0.8038)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [161]  [2200/2502]  eta: 0:01:41  lr: 0.001958  min_lr: 0.001958  loss: 3.3868 (3.3698)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7769 (0.8088)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [161]  [2400/2502]  eta: 0:00:34  lr: 0.001957  min_lr: 0.001957  loss: 3.4816 (3.3717)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8208 (0.8088)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [161]  [2501/2502]  eta: 0:00:00  lr: 0.001956  min_lr: 0.001956  loss: 3.3993 (3.3743)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7254 (0.8057)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [161] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.001956  min_lr: 0.001956  loss: 3.3993 (3.3673)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7254 (0.8057)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.6784 (0.6784)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 3.0669  data: 2.8695  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9846 (0.9929)  acc1: 84.8000 (84.0727)  acc5: 97.2000 (96.5455)  time: 0.4536  data: 0.2867  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0019 (1.0128)  acc1: 83.2000 (82.9905)  acc5: 97.2000 (96.7238)  time: 0.1999  data: 0.0360  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1433 (1.1102)  acc1: 78.4000 (80.8387)  acc5: 94.0000 (95.4323)  time: 0.2364  data: 0.0725  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3713 (1.1813)  acc1: 74.8000 (78.9756)  acc5: 92.0000 (94.6732)  time: 0.2427  data: 0.0790  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3704 (1.1958)  acc1: 73.6000 (78.3040)  acc5: 92.8000 (94.6480)  time: 0.1960  data: 0.0325  max mem: 27255
Test: Total time: 0:00:13 (0.2718 s / it)
* Acc@1 78.478 Acc@5 94.736 loss 1.190
Accuracy of the model on the 50000 test images: 78.5%
Max accuracy: 78.48%
Epoch: [162]  [   0/2502]  eta: 1:22:15  lr: 0.001956  min_lr: 0.001956  loss: 3.1302 (3.1302)  weight_decay: 0.0500 (0.0500)  time: 1.9726  data: 1.6330  max mem: 27255
Epoch: [162]  [ 200/2502]  eta: 0:13:15  lr: 0.001954  min_lr: 0.001954  loss: 3.5433 (3.2606)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7367 (0.7964)  time: 0.3414  data: 0.0004  max mem: 27255
Epoch: [162]  [ 400/2502]  eta: 0:11:54  lr: 0.001952  min_lr: 0.001952  loss: 3.5285 (3.3396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7156 (0.7797)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [162]  [ 600/2502]  eta: 0:10:42  lr: 0.001950  min_lr: 0.001950  loss: 3.4494 (3.3617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8165 (0.7906)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [162]  [ 800/2502]  eta: 0:09:33  lr: 0.001948  min_lr: 0.001948  loss: 3.4761 (3.3784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8487 (0.8036)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [162]  [1000/2502]  eta: 0:08:25  lr: 0.001947  min_lr: 0.001947  loss: 3.4614 (3.3656)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7736 (0.8030)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [162]  [1200/2502]  eta: 0:07:17  lr: 0.001945  min_lr: 0.001945  loss: 3.5139 (3.3758)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8197 (0.8053)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [162]  [1400/2502]  eta: 0:06:10  lr: 0.001943  min_lr: 0.001943  loss: 3.3731 (3.3686)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7580 (0.8057)  time: 0.3407  data: 0.0005  max mem: 27255
Epoch: [162]  [1600/2502]  eta: 0:05:03  lr: 0.001941  min_lr: 0.001941  loss: 3.3825 (3.3641)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7468 (0.8056)  time: 0.3341  data: 0.0003  max mem: 27255
Epoch: [162]  [1800/2502]  eta: 0:03:55  lr: 0.001939  min_lr: 0.001939  loss: 3.5123 (3.3652)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7704 (0.8032)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [162]  [2000/2502]  eta: 0:02:48  lr: 0.001938  min_lr: 0.001938  loss: 3.5751 (3.3647)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7971 (0.8024)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [162]  [2200/2502]  eta: 0:01:41  lr: 0.001936  min_lr: 0.001936  loss: 3.5208 (3.3658)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7846 (0.8017)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [162]  [2400/2502]  eta: 0:00:34  lr: 0.001934  min_lr: 0.001934  loss: 3.4525 (3.3653)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7765 (0.8035)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [162]  [2501/2502]  eta: 0:00:00  lr: 0.001933  min_lr: 0.001933  loss: 3.5633 (3.3676)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7836 (0.8030)  time: 0.3012  data: 0.0009  max mem: 27255
Epoch: [162] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.001933  min_lr: 0.001933  loss: 3.5633 (3.3625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7836 (0.8030)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.5566 (0.5566)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.8029  data: 2.6085  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8627 (0.9112)  acc1: 84.4000 (83.7818)  acc5: 97.2000 (96.9818)  time: 0.4245  data: 0.2579  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8880 (0.9186)  acc1: 82.8000 (83.1048)  acc5: 97.2000 (97.0857)  time: 0.2221  data: 0.0583  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1360 (1.0325)  acc1: 78.4000 (81.0581)  acc5: 94.4000 (95.4968)  time: 0.2531  data: 0.0893  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3362 (1.1044)  acc1: 74.8000 (79.3366)  acc5: 92.4000 (94.7122)  time: 0.2301  data: 0.0664  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3297 (1.1235)  acc1: 73.6000 (78.5680)  acc5: 92.4000 (94.5680)  time: 0.1875  data: 0.0240  max mem: 27255
Test: Total time: 0:00:13 (0.2683 s / it)
* Acc@1 78.450 Acc@5 94.682 loss 1.129
Accuracy of the model on the 50000 test images: 78.5%
Max accuracy: 78.48%
Epoch: [163]  [   0/2502]  eta: 1:24:57  lr: 0.001933  min_lr: 0.001933  loss: 3.7375 (3.7375)  weight_decay: 0.0500 (0.0500)  time: 2.0373  data: 1.4508  max mem: 27255
Epoch: [163]  [ 200/2502]  eta: 0:13:08  lr: 0.001931  min_lr: 0.001931  loss: 3.0190 (3.3329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7281 (0.8075)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [163]  [ 400/2502]  eta: 0:11:51  lr: 0.001930  min_lr: 0.001930  loss: 3.5185 (3.3314)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8691 (0.8343)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [163]  [ 600/2502]  eta: 0:10:41  lr: 0.001928  min_lr: 0.001928  loss: 3.5681 (3.3157)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8448 (0.8436)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [163]  [ 800/2502]  eta: 0:09:33  lr: 0.001926  min_lr: 0.001926  loss: 3.2914 (3.3047)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7722 (0.8282)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [163]  [1000/2502]  eta: 0:08:24  lr: 0.001924  min_lr: 0.001924  loss: 3.2060 (3.3148)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7314 (0.8238)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [163]  [1200/2502]  eta: 0:07:17  lr: 0.001922  min_lr: 0.001922  loss: 3.5404 (3.3195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7366 (0.8242)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [163]  [1400/2502]  eta: 0:06:10  lr: 0.001921  min_lr: 0.001921  loss: 3.4263 (3.3267)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8256 (0.8248)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [163]  [1600/2502]  eta: 0:05:02  lr: 0.001919  min_lr: 0.001919  loss: 3.6443 (3.3275)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8209 (0.8218)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [163]  [1800/2502]  eta: 0:03:55  lr: 0.001917  min_lr: 0.001917  loss: 3.4021 (3.3348)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7926 (0.8193)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [163]  [2000/2502]  eta: 0:02:48  lr: 0.001915  min_lr: 0.001915  loss: 3.3940 (3.3374)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8564 (0.8193)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [163]  [2200/2502]  eta: 0:01:41  lr: 0.001913  min_lr: 0.001913  loss: 3.6706 (3.3464)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7797 (0.8185)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [163]  [2400/2502]  eta: 0:00:34  lr: 0.001912  min_lr: 0.001912  loss: 3.6038 (3.3527)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8526 (0.8222)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [163]  [2501/2502]  eta: 0:00:00  lr: 0.001911  min_lr: 0.001911  loss: 3.6518 (3.3542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7367 (0.8207)  time: 0.3014  data: 0.0009  max mem: 27255
Epoch: [163] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.001911  min_lr: 0.001911  loss: 3.6518 (3.3622)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7367 (0.8207)
Test:  [ 0/50]  eta: 0:02:13  loss: 0.7162 (0.7162)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 2.6740  data: 2.4767  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9216 (0.9854)  acc1: 85.6000 (84.5818)  acc5: 96.8000 (96.9818)  time: 0.4226  data: 0.2552  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9564 (0.9975)  acc1: 83.6000 (83.6571)  acc5: 96.8000 (97.0286)  time: 0.2030  data: 0.0388  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1573 (1.1117)  acc1: 79.2000 (81.3032)  acc5: 94.4000 (95.6903)  time: 0.2393  data: 0.0724  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.4080 (1.1880)  acc1: 72.4000 (79.1317)  acc5: 92.4000 (94.8000)  time: 0.2384  data: 0.0697  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3784 (1.2105)  acc1: 72.4000 (78.4640)  acc5: 92.4000 (94.6240)  time: 0.1853  data: 0.0197  max mem: 27255
Test: Total time: 0:00:13 (0.2618 s / it)
* Acc@1 78.416 Acc@5 94.688 loss 1.205
Accuracy of the model on the 50000 test images: 78.4%
Max accuracy: 78.48%
Epoch: [164]  [   0/2502]  eta: 1:30:21  lr: 0.001911  min_lr: 0.001911  loss: 2.5584 (2.5584)  weight_decay: 0.0500 (0.0500)  time: 2.1670  data: 1.8299  max mem: 27255
Epoch: [164]  [ 200/2502]  eta: 0:13:08  lr: 0.001909  min_lr: 0.001909  loss: 3.2854 (3.3529)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [164]  [ 400/2502]  eta: 0:11:52  lr: 0.001907  min_lr: 0.001907  loss: 3.3737 (3.3405)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7631 (nan)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [164]  [ 600/2502]  eta: 0:10:40  lr: 0.001905  min_lr: 0.001905  loss: 3.4841 (3.3409)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8679 (nan)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [164]  [ 800/2502]  eta: 0:09:32  lr: 0.001904  min_lr: 0.001904  loss: 3.5581 (3.3526)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8539 (nan)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [164]  [1000/2502]  eta: 0:08:24  lr: 0.001902  min_lr: 0.001902  loss: 3.3911 (3.3597)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8271 (nan)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [164]  [1200/2502]  eta: 0:07:16  lr: 0.001900  min_lr: 0.001900  loss: 3.3615 (3.3560)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8341 (nan)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [164]  [1400/2502]  eta: 0:06:09  lr: 0.001898  min_lr: 0.001898  loss: 3.4040 (3.3602)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7902 (nan)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [164]  [1600/2502]  eta: 0:05:02  lr: 0.001896  min_lr: 0.001896  loss: 3.4749 (3.3613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7809 (nan)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [164]  [1800/2502]  eta: 0:03:55  lr: 0.001895  min_lr: 0.001895  loss: 3.4547 (3.3581)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8272 (nan)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [164]  [2000/2502]  eta: 0:02:48  lr: 0.001893  min_lr: 0.001893  loss: 3.4368 (3.3563)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8049 (nan)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [164]  [2200/2502]  eta: 0:01:41  lr: 0.001891  min_lr: 0.001891  loss: 3.3896 (3.3567)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8269 (nan)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [164]  [2400/2502]  eta: 0:00:34  lr: 0.001889  min_lr: 0.001889  loss: 3.0804 (3.3561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8143 (nan)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [164]  [2501/2502]  eta: 0:00:00  lr: 0.001888  min_lr: 0.001888  loss: 3.4369 (3.3556)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8163 (nan)  time: 0.2994  data: 0.0009  max mem: 27255
Epoch: [164] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.001888  min_lr: 0.001888  loss: 3.4369 (3.3569)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8163 (nan)
Test:  [ 0/50]  eta: 0:02:25  loss: 0.6804 (0.6804)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.9103  data: 2.7057  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 1.0063 (1.0223)  acc1: 83.6000 (83.2364)  acc5: 97.2000 (96.8000)  time: 0.4145  data: 0.2464  max mem: 27255
Test:  [20/50]  eta: 0:00:08  loss: 1.0063 (1.0255)  acc1: 81.2000 (82.6095)  acc5: 97.2000 (96.9333)  time: 0.1676  data: 0.0034  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1623 (1.1217)  acc1: 78.4000 (80.4645)  acc5: 94.8000 (95.5742)  time: 0.1989  data: 0.0315  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3836 (1.1896)  acc1: 74.0000 (78.7610)  acc5: 91.6000 (94.8195)  time: 0.2235  data: 0.0562  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3352 (1.2041)  acc1: 74.0000 (78.1120)  acc5: 92.8000 (94.6880)  time: 0.2017  data: 0.0362  max mem: 27255
Test: Total time: 0:00:12 (0.2465 s / it)
* Acc@1 78.234 Acc@5 94.712 loss 1.201
Accuracy of the model on the 50000 test images: 78.2%
Max accuracy: 78.48%
Epoch: [165]  [   0/2502]  eta: 1:35:07  lr: 0.001888  min_lr: 0.001888  loss: 3.6632 (3.6632)  weight_decay: 0.0500 (0.0500)  time: 2.2810  data: 1.5360  max mem: 27255
Epoch: [165]  [ 200/2502]  eta: 0:13:14  lr: 0.001887  min_lr: 0.001887  loss: 3.5007 (3.3162)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8131 (0.7888)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [165]  [ 400/2502]  eta: 0:11:55  lr: 0.001885  min_lr: 0.001885  loss: 3.3689 (3.3709)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8388 (0.8013)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [165]  [ 600/2502]  eta: 0:10:43  lr: 0.001883  min_lr: 0.001883  loss: 3.4976 (3.3818)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7935 (0.8008)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [165]  [ 800/2502]  eta: 0:09:34  lr: 0.001881  min_lr: 0.001881  loss: 3.5728 (3.3834)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8314 (0.8021)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [165]  [1000/2502]  eta: 0:08:26  lr: 0.001879  min_lr: 0.001879  loss: 3.4647 (3.3783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8311 (0.8136)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [165]  [1200/2502]  eta: 0:07:18  lr: 0.001878  min_lr: 0.001878  loss: 3.2502 (3.3615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8120 (0.8137)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [165]  [1400/2502]  eta: 0:06:10  lr: 0.001876  min_lr: 0.001876  loss: 3.4561 (3.3533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7822 (0.8104)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [165]  [1600/2502]  eta: 0:05:03  lr: 0.001874  min_lr: 0.001874  loss: 3.5872 (3.3575)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8661 (0.8128)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [165]  [1800/2502]  eta: 0:03:55  lr: 0.001872  min_lr: 0.001872  loss: 3.4245 (3.3645)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8038 (0.8126)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [165]  [2000/2502]  eta: 0:02:48  lr: 0.001870  min_lr: 0.001870  loss: 3.3517 (3.3613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7814 (0.8126)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [165]  [2200/2502]  eta: 0:01:41  lr: 0.001869  min_lr: 0.001869  loss: 3.2479 (3.3595)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7524 (0.8131)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [165]  [2400/2502]  eta: 0:00:34  lr: 0.001867  min_lr: 0.001867  loss: 3.4039 (3.3578)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8144 (0.8168)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [165]  [2501/2502]  eta: 0:00:00  lr: 0.001866  min_lr: 0.001866  loss: 3.5671 (3.3607)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8365 (0.8196)  time: 0.3007  data: 0.0009  max mem: 27255
Epoch: [165] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.001866  min_lr: 0.001866  loss: 3.5671 (3.3481)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8365 (0.8196)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.6253 (0.6253)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.3826  data: 3.2005  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9558 (0.9670)  acc1: 84.0000 (84.2182)  acc5: 96.8000 (96.6182)  time: 0.4662  data: 0.3006  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9472 (0.9717)  acc1: 82.4000 (83.3905)  acc5: 96.8000 (96.8571)  time: 0.1695  data: 0.0056  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0891 (1.0809)  acc1: 77.6000 (81.1484)  acc5: 94.8000 (95.5355)  time: 0.1848  data: 0.0208  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3523 (1.1477)  acc1: 74.4000 (79.1902)  acc5: 92.4000 (94.9756)  time: 0.2009  data: 0.0371  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3333 (1.1627)  acc1: 73.2000 (78.5520)  acc5: 93.2000 (94.8080)  time: 0.2007  data: 0.0369  max mem: 27255
Test: Total time: 0:00:12 (0.2472 s / it)
* Acc@1 78.626 Acc@5 94.710 loss 1.166
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 78.63%
Epoch: [166]  [   0/2502]  eta: 1:29:44  lr: 0.001866  min_lr: 0.001866  loss: 3.0896 (3.0896)  weight_decay: 0.0500 (0.0500)  time: 2.1521  data: 1.7986  max mem: 27255
Epoch: [166]  [ 200/2502]  eta: 0:13:12  lr: 0.001864  min_lr: 0.001864  loss: 3.5998 (3.3455)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7599 (0.8142)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [166]  [ 400/2502]  eta: 0:11:52  lr: 0.001862  min_lr: 0.001862  loss: 3.5312 (3.3298)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7481 (0.8018)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [166]  [ 600/2502]  eta: 0:10:41  lr: 0.001861  min_lr: 0.001861  loss: 3.1596 (3.3322)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8090 (0.8179)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [166]  [ 800/2502]  eta: 0:09:32  lr: 0.001859  min_lr: 0.001859  loss: 3.6391 (3.3278)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7574 (0.8116)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [166]  [1000/2502]  eta: 0:08:25  lr: 0.001857  min_lr: 0.001857  loss: 3.3364 (3.3290)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8054 (0.8211)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [166]  [1200/2502]  eta: 0:07:17  lr: 0.001855  min_lr: 0.001855  loss: 3.2025 (3.3344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7842 (0.8230)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [166]  [1400/2502]  eta: 0:06:09  lr: 0.001853  min_lr: 0.001853  loss: 3.5028 (3.3331)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8180 (0.8249)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [166]  [1600/2502]  eta: 0:05:02  lr: 0.001852  min_lr: 0.001852  loss: 3.4730 (3.3329)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7919 (0.8245)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [166]  [1800/2502]  eta: 0:03:55  lr: 0.001850  min_lr: 0.001850  loss: 3.4998 (3.3377)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7877 (0.8259)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [166]  [2000/2502]  eta: 0:02:48  lr: 0.001848  min_lr: 0.001848  loss: 3.4355 (3.3367)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8170 (0.8263)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [166]  [2200/2502]  eta: 0:01:41  lr: 0.001846  min_lr: 0.001846  loss: 3.5399 (3.3433)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7614 (0.8244)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [166]  [2400/2502]  eta: 0:00:34  lr: 0.001845  min_lr: 0.001845  loss: 3.2824 (3.3440)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8401 (0.8279)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [166]  [2501/2502]  eta: 0:00:00  lr: 0.001844  min_lr: 0.001844  loss: 3.4413 (3.3459)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8033 (0.8278)  time: 0.3038  data: 0.0009  max mem: 27255
Epoch: [166] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.001844  min_lr: 0.001844  loss: 3.4413 (3.3414)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8033 (0.8278)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.5310 (0.5310)  acc1: 92.8000 (92.8000)  acc5: 98.4000 (98.4000)  time: 2.7463  data: 2.5546  max mem: 27255
Test:  [10/50]  eta: 0:00:15  loss: 0.8180 (0.8658)  acc1: 82.4000 (83.7091)  acc5: 96.8000 (96.9818)  time: 0.3999  data: 0.2326  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8504 (0.8811)  acc1: 81.6000 (83.0857)  acc5: 96.8000 (97.1238)  time: 0.1834  data: 0.0190  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0302 (0.9986)  acc1: 78.4000 (80.8903)  acc5: 95.6000 (95.6774)  time: 0.2299  data: 0.0630  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3108 (1.0763)  acc1: 74.0000 (79.1220)  acc5: 92.4000 (94.9659)  time: 0.2262  data: 0.0594  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2899 (1.0960)  acc1: 73.6000 (78.5200)  acc5: 92.8000 (94.8880)  time: 0.1790  data: 0.0153  max mem: 27255
Test: Total time: 0:00:12 (0.2504 s / it)
* Acc@1 78.814 Acc@5 94.934 loss 1.093
Accuracy of the model on the 50000 test images: 78.8%
Max accuracy: 78.81%
Epoch: [167]  [   0/2502]  eta: 1:19:11  lr: 0.001844  min_lr: 0.001844  loss: 3.3388 (3.3388)  weight_decay: 0.0500 (0.0500)  time: 1.8991  data: 1.5427  max mem: 27255
Epoch: [167]  [ 200/2502]  eta: 0:13:09  lr: 0.001842  min_lr: 0.001842  loss: 3.4264 (3.2916)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8262 (0.8255)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [167]  [ 400/2502]  eta: 0:11:50  lr: 0.001840  min_lr: 0.001840  loss: 3.3746 (3.2900)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7968 (0.8264)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [167]  [ 600/2502]  eta: 0:10:40  lr: 0.001838  min_lr: 0.001838  loss: 3.5694 (3.3122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7702 (0.8236)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [167]  [ 800/2502]  eta: 0:09:32  lr: 0.001836  min_lr: 0.001836  loss: 3.4831 (3.3064)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7710 (0.8164)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [167]  [1000/2502]  eta: 0:08:24  lr: 0.001835  min_lr: 0.001835  loss: 3.3412 (3.3134)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7480 (0.8147)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [167]  [1200/2502]  eta: 0:07:16  lr: 0.001833  min_lr: 0.001833  loss: 3.3271 (3.3177)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7832 (0.8116)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [167]  [1400/2502]  eta: 0:06:09  lr: 0.001831  min_lr: 0.001831  loss: 3.3510 (3.3274)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8535 (0.8144)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [167]  [1600/2502]  eta: 0:05:02  lr: 0.001829  min_lr: 0.001829  loss: 3.3272 (3.3293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7886 (0.8193)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [167]  [1800/2502]  eta: 0:03:55  lr: 0.001828  min_lr: 0.001828  loss: 3.4410 (3.3209)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8329 (0.8199)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [167]  [2000/2502]  eta: 0:02:48  lr: 0.001826  min_lr: 0.001826  loss: 3.3170 (3.3244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8362 (0.8211)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [167]  [2200/2502]  eta: 0:01:41  lr: 0.001824  min_lr: 0.001824  loss: 3.4942 (3.3271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8497 (0.8223)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [167]  [2400/2502]  eta: 0:00:34  lr: 0.001822  min_lr: 0.001822  loss: 3.2961 (3.3240)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7354 (0.8214)  time: 0.3346  data: 0.0003  max mem: 27255
Epoch: [167]  [2501/2502]  eta: 0:00:00  lr: 0.001821  min_lr: 0.001821  loss: 3.3469 (3.3252)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8844 (0.8245)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [167] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.001821  min_lr: 0.001821  loss: 3.3469 (3.3366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8844 (0.8245)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.6631 (0.6631)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 2.7516  data: 2.5607  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9443 (0.9730)  acc1: 85.2000 (84.3636)  acc5: 96.8000 (96.8000)  time: 0.4528  data: 0.2864  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9701 (0.9882)  acc1: 82.8000 (83.5429)  acc5: 96.8000 (97.0286)  time: 0.2160  data: 0.0521  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1395 (1.0983)  acc1: 78.8000 (80.9161)  acc5: 95.2000 (95.4194)  time: 0.1976  data: 0.0337  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3423 (1.1640)  acc1: 74.0000 (79.2000)  acc5: 92.0000 (94.8683)  time: 0.1980  data: 0.0341  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3368 (1.1883)  acc1: 74.0000 (78.5840)  acc5: 93.2000 (94.6960)  time: 0.1889  data: 0.0252  max mem: 27255
Test: Total time: 0:00:12 (0.2532 s / it)
* Acc@1 78.620 Acc@5 94.776 loss 1.188
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 78.81%
Epoch: [168]  [   0/2502]  eta: 1:37:27  lr: 0.001821  min_lr: 0.001821  loss: 3.1324 (3.1324)  weight_decay: 0.0500 (0.0500)  time: 2.3373  data: 1.9489  max mem: 27255
Epoch: [168]  [ 200/2502]  eta: 0:13:15  lr: 0.001819  min_lr: 0.001819  loss: 3.4656 (3.2965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7870 (0.8366)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [168]  [ 400/2502]  eta: 0:11:55  lr: 0.001818  min_lr: 0.001818  loss: 3.4328 (3.3129)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7704 (0.8225)  time: 0.3401  data: 0.0004  max mem: 27255
Epoch: [168]  [ 600/2502]  eta: 0:10:43  lr: 0.001816  min_lr: 0.001816  loss: 3.4178 (3.3340)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9039 (0.8327)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [168]  [ 800/2502]  eta: 0:09:34  lr: 0.001814  min_lr: 0.001814  loss: 3.2875 (3.3344)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7676 (0.8409)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [168]  [1000/2502]  eta: 0:08:25  lr: 0.001812  min_lr: 0.001812  loss: 3.3965 (3.3328)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7351 (0.8363)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [168]  [1200/2502]  eta: 0:07:18  lr: 0.001811  min_lr: 0.001811  loss: 3.3939 (3.3293)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8196 (0.8359)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [168]  [1400/2502]  eta: 0:06:10  lr: 0.001809  min_lr: 0.001809  loss: 2.8851 (3.3350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7594 (0.8375)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [168]  [1600/2502]  eta: 0:05:02  lr: 0.001807  min_lr: 0.001807  loss: 3.5656 (3.3408)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8126 (0.8366)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [168]  [1800/2502]  eta: 0:03:55  lr: 0.001805  min_lr: 0.001805  loss: 3.5123 (3.3454)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9302 (0.8389)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [168]  [2000/2502]  eta: 0:02:48  lr: 0.001803  min_lr: 0.001803  loss: 3.5457 (3.3443)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7831 (0.8388)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [168]  [2200/2502]  eta: 0:01:41  lr: 0.001802  min_lr: 0.001802  loss: 3.2535 (3.3432)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7513 (0.8375)  time: 0.3341  data: 0.0003  max mem: 27255
Epoch: [168]  [2400/2502]  eta: 0:00:34  lr: 0.001800  min_lr: 0.001800  loss: 3.0226 (3.3363)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8018 (0.8388)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [168]  [2501/2502]  eta: 0:00:00  lr: 0.001799  min_lr: 0.001799  loss: 3.2152 (3.3342)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8697 (0.8412)  time: 0.3000  data: 0.0007  max mem: 27255
Epoch: [168] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.001799  min_lr: 0.001799  loss: 3.2152 (3.3280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8697 (0.8412)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.5590 (0.5590)  acc1: 92.0000 (92.0000)  acc5: 98.8000 (98.8000)  time: 2.8825  data: 2.6809  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8572 (0.8923)  acc1: 85.6000 (84.1455)  acc5: 96.8000 (96.5818)  time: 0.4125  data: 0.2441  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9019 (0.9185)  acc1: 82.4000 (83.1810)  acc5: 96.8000 (96.8381)  time: 0.2275  data: 0.0627  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1032 (1.0219)  acc1: 78.0000 (80.9032)  acc5: 96.0000 (95.4839)  time: 0.2545  data: 0.0902  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2904 (1.0866)  acc1: 74.8000 (79.1512)  acc5: 92.0000 (94.8000)  time: 0.2053  data: 0.0415  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2410 (1.1007)  acc1: 74.8000 (78.5440)  acc5: 92.8000 (94.7440)  time: 0.1775  data: 0.0139  max mem: 27255
Test: Total time: 0:00:13 (0.2619 s / it)
* Acc@1 78.600 Acc@5 94.846 loss 1.100
Accuracy of the model on the 50000 test images: 78.6%
Max accuracy: 78.81%
Epoch: [169]  [   0/2502]  eta: 1:30:10  lr: 0.001799  min_lr: 0.001799  loss: 3.6164 (3.6164)  weight_decay: 0.0500 (0.0500)  time: 2.1624  data: 1.6904  max mem: 27255
Epoch: [169]  [ 200/2502]  eta: 0:13:12  lr: 0.001797  min_lr: 0.001797  loss: 3.4431 (3.3388)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7848 (0.8715)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [169]  [ 400/2502]  eta: 0:11:53  lr: 0.001795  min_lr: 0.001795  loss: 3.2406 (3.3278)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7645 (inf)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [169]  [ 600/2502]  eta: 0:10:44  lr: 0.001794  min_lr: 0.001794  loss: 3.4216 (3.3263)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7900 (inf)  time: 0.3391  data: 0.0005  max mem: 27255
Epoch: [169]  [ 800/2502]  eta: 0:09:34  lr: 0.001792  min_lr: 0.001792  loss: 3.3992 (3.3338)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8639 (inf)  time: 0.3361  data: 0.0005  max mem: 27255
Epoch: [169]  [1000/2502]  eta: 0:08:26  lr: 0.001790  min_lr: 0.001790  loss: 3.5072 (3.3375)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7726 (inf)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [169]  [1200/2502]  eta: 0:07:18  lr: 0.001788  min_lr: 0.001788  loss: 3.3328 (3.3299)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8006 (inf)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [169]  [1400/2502]  eta: 0:06:10  lr: 0.001786  min_lr: 0.001786  loss: 3.1076 (3.3224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7790 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [169]  [1600/2502]  eta: 0:05:03  lr: 0.001785  min_lr: 0.001785  loss: 3.5417 (3.3271)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8117 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [169]  [1800/2502]  eta: 0:03:55  lr: 0.001783  min_lr: 0.001783  loss: 3.0609 (3.3313)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7929 (inf)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [169]  [2000/2502]  eta: 0:02:48  lr: 0.001781  min_lr: 0.001781  loss: 3.4211 (3.3376)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7827 (inf)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [169]  [2200/2502]  eta: 0:01:41  lr: 0.001779  min_lr: 0.001779  loss: 3.6347 (3.3342)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8801 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [169]  [2400/2502]  eta: 0:00:34  lr: 0.001778  min_lr: 0.001778  loss: 3.5376 (3.3399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8445 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [169]  [2501/2502]  eta: 0:00:00  lr: 0.001777  min_lr: 0.001777  loss: 3.4928 (3.3424)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9363 (inf)  time: 0.3006  data: 0.0007  max mem: 27255
Epoch: [169] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.001777  min_lr: 0.001777  loss: 3.4928 (3.3356)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9363 (inf)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.5666 (0.5666)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.8906  data: 2.6918  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8490 (0.8757)  acc1: 85.2000 (83.9636)  acc5: 98.0000 (97.2727)  time: 0.4614  data: 0.2946  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8879 (0.9033)  acc1: 81.6000 (83.0286)  acc5: 97.2000 (97.1429)  time: 0.1995  data: 0.0358  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0117 (1.0086)  acc1: 79.2000 (80.7742)  acc5: 95.6000 (95.7419)  time: 0.1723  data: 0.0087  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2962 (1.0767)  acc1: 75.6000 (78.9854)  acc5: 92.0000 (94.9463)  time: 0.1901  data: 0.0265  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2673 (1.0924)  acc1: 73.6000 (78.5040)  acc5: 92.4000 (94.8480)  time: 0.1899  data: 0.0263  max mem: 27255
Test: Total time: 0:00:12 (0.2449 s / it)
* Acc@1 78.672 Acc@5 94.828 loss 1.095
Accuracy of the model on the 50000 test images: 78.7%
Max accuracy: 78.81%
Epoch: [170]  [   0/2502]  eta: 1:27:20  lr: 0.001777  min_lr: 0.001777  loss: 3.7758 (3.7758)  weight_decay: 0.0500 (0.0500)  time: 2.0946  data: 1.7482  max mem: 27255
Epoch: [170]  [ 200/2502]  eta: 0:13:11  lr: 0.001775  min_lr: 0.001775  loss: 3.3212 (3.2841)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7674 (0.8234)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [170]  [ 400/2502]  eta: 0:11:52  lr: 0.001773  min_lr: 0.001773  loss: 3.3094 (3.3195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8196 (0.8459)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [170]  [ 600/2502]  eta: 0:10:41  lr: 0.001771  min_lr: 0.001771  loss: 3.3614 (3.3207)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7772 (0.8313)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [170]  [ 800/2502]  eta: 0:09:32  lr: 0.001769  min_lr: 0.001769  loss: 3.4397 (3.3159)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8802 (0.8480)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [170]  [1000/2502]  eta: 0:08:24  lr: 0.001768  min_lr: 0.001768  loss: 3.5521 (3.3171)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7895 (0.8425)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [170]  [1200/2502]  eta: 0:07:16  lr: 0.001766  min_lr: 0.001766  loss: 3.4630 (3.3140)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7780 (0.8436)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [170]  [1400/2502]  eta: 0:06:09  lr: 0.001764  min_lr: 0.001764  loss: 3.5606 (3.3242)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8684 (0.8471)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [170]  [1600/2502]  eta: 0:05:02  lr: 0.001762  min_lr: 0.001762  loss: 3.0851 (3.3233)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8148 (0.8495)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [170]  [1800/2502]  eta: 0:03:55  lr: 0.001761  min_lr: 0.001761  loss: 3.3914 (3.3190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7730 (0.8449)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [170]  [2000/2502]  eta: 0:02:48  lr: 0.001759  min_lr: 0.001759  loss: 3.1760 (3.3224)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8547 (0.8468)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [170]  [2200/2502]  eta: 0:01:41  lr: 0.001757  min_lr: 0.001757  loss: 3.4977 (3.3243)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7817 (0.8469)  time: 0.3415  data: 0.0004  max mem: 27255
Epoch: [170]  [2400/2502]  eta: 0:00:34  lr: 0.001755  min_lr: 0.001755  loss: 3.5156 (3.3275)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8276 (0.8441)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [170]  [2501/2502]  eta: 0:00:00  lr: 0.001754  min_lr: 0.001754  loss: 3.3905 (3.3256)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8200 (0.8437)  time: 0.3000  data: 0.0007  max mem: 27255
Epoch: [170] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.001754  min_lr: 0.001754  loss: 3.3905 (3.3244)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8200 (0.8437)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.5548 (0.5548)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 2.8556  data: 2.6529  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8584 (0.8923)  acc1: 84.8000 (84.3636)  acc5: 97.2000 (96.8727)  time: 0.4492  data: 0.2817  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9338 (0.9126)  acc1: 82.0000 (83.6191)  acc5: 96.8000 (96.9524)  time: 0.2374  data: 0.0732  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0385 (1.0148)  acc1: 78.8000 (81.2645)  acc5: 94.8000 (95.6258)  time: 0.2387  data: 0.0744  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2512 (1.0712)  acc1: 74.4000 (79.6683)  acc5: 92.8000 (95.0732)  time: 0.2024  data: 0.0384  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2512 (1.0931)  acc1: 74.4000 (78.9120)  acc5: 93.6000 (94.8800)  time: 0.2019  data: 0.0382  max mem: 27255
Test: Total time: 0:00:13 (0.2646 s / it)
* Acc@1 79.088 Acc@5 94.902 loss 1.087
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.09%
Epoch: [171]  [   0/2502]  eta: 1:18:35  lr: 0.001754  min_lr: 0.001754  loss: 3.8764 (3.8764)  weight_decay: 0.0500 (0.0500)  time: 1.8848  data: 1.5369  max mem: 27255
Epoch: [171]  [ 200/2502]  eta: 0:13:08  lr: 0.001753  min_lr: 0.001753  loss: 3.1188 (3.2615)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8538 (0.8544)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [171]  [ 400/2502]  eta: 0:11:51  lr: 0.001751  min_lr: 0.001751  loss: 3.3747 (3.3106)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8776 (0.8708)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [171]  [ 600/2502]  eta: 0:10:41  lr: 0.001749  min_lr: 0.001749  loss: 3.4121 (3.3173)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8247 (0.8687)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [171]  [ 800/2502]  eta: 0:09:33  lr: 0.001747  min_lr: 0.001747  loss: 3.5383 (3.3067)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8456 (0.8638)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [171]  [1000/2502]  eta: 0:08:25  lr: 0.001745  min_lr: 0.001745  loss: 3.4673 (3.3110)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9161 (0.8802)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [171]  [1200/2502]  eta: 0:07:17  lr: 0.001744  min_lr: 0.001744  loss: 3.4823 (3.3175)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8469 (0.8762)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [171]  [1400/2502]  eta: 0:06:10  lr: 0.001742  min_lr: 0.001742  loss: 3.4656 (3.3156)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8038 (0.8712)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [171]  [1600/2502]  eta: 0:05:03  lr: 0.001740  min_lr: 0.001740  loss: 3.3156 (3.3163)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8258 (0.8671)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [171]  [1800/2502]  eta: 0:03:55  lr: 0.001738  min_lr: 0.001738  loss: 3.1567 (3.3118)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8063 (0.8642)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [171]  [2000/2502]  eta: 0:02:48  lr: 0.001737  min_lr: 0.001737  loss: 3.5182 (3.3096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8906 (0.8645)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [171]  [2200/2502]  eta: 0:01:41  lr: 0.001735  min_lr: 0.001735  loss: 3.4865 (3.3086)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8695 (0.8627)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [171]  [2400/2502]  eta: 0:00:34  lr: 0.001733  min_lr: 0.001733  loss: 3.6253 (3.3189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8615 (0.8638)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [171]  [2501/2502]  eta: 0:00:00  lr: 0.001732  min_lr: 0.001732  loss: 3.0618 (3.3171)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9159 (0.8657)  time: 0.3003  data: 0.0009  max mem: 27255
Epoch: [171] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.001732  min_lr: 0.001732  loss: 3.0618 (3.3261)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9159 (0.8657)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.5780 (0.5780)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 2.8955  data: 2.6933  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8495 (0.8787)  acc1: 84.8000 (84.5091)  acc5: 96.8000 (97.0909)  time: 0.4298  data: 0.2623  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9174 (0.9089)  acc1: 82.0000 (83.5048)  acc5: 96.8000 (97.1619)  time: 0.2150  data: 0.0482  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0446 (1.0120)  acc1: 78.0000 (81.2387)  acc5: 94.8000 (95.7161)  time: 0.2574  data: 0.0898  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2606 (1.0679)  acc1: 74.4000 (79.5317)  acc5: 92.4000 (95.0537)  time: 0.2433  data: 0.0787  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2249 (1.0849)  acc1: 74.4000 (79.0080)  acc5: 92.8000 (94.9120)  time: 0.1913  data: 0.0275  max mem: 27255
Test: Total time: 0:00:13 (0.2728 s / it)
* Acc@1 79.064 Acc@5 94.922 loss 1.081
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.09%
Epoch: [172]  [   0/2502]  eta: 1:14:41  lr: 0.001732  min_lr: 0.001732  loss: 3.5100 (3.5100)  weight_decay: 0.0500 (0.0500)  time: 1.7913  data: 1.3601  max mem: 27255
Epoch: [172]  [ 200/2502]  eta: 0:13:06  lr: 0.001730  min_lr: 0.001730  loss: 3.4528 (3.3132)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8465 (0.8151)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [172]  [ 400/2502]  eta: 0:11:52  lr: 0.001729  min_lr: 0.001729  loss: 3.4806 (3.3192)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7677 (0.8361)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [172]  [ 600/2502]  eta: 0:10:41  lr: 0.001727  min_lr: 0.001727  loss: 3.4774 (3.3127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8219 (0.8472)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [172]  [ 800/2502]  eta: 0:09:33  lr: 0.001725  min_lr: 0.001725  loss: 3.3454 (3.3195)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8412 (0.8533)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [172]  [1000/2502]  eta: 0:08:25  lr: 0.001723  min_lr: 0.001723  loss: 3.4521 (3.3213)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8539 (0.8604)  time: 0.3408  data: 0.0005  max mem: 27255
Epoch: [172]  [1200/2502]  eta: 0:07:17  lr: 0.001721  min_lr: 0.001721  loss: 3.4881 (3.3255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8579 (0.8611)  time: 0.3421  data: 0.0004  max mem: 27255
Epoch: [172]  [1400/2502]  eta: 0:06:10  lr: 0.001720  min_lr: 0.001720  loss: 3.4772 (3.3214)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8278 (0.8618)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [172]  [1600/2502]  eta: 0:05:02  lr: 0.001718  min_lr: 0.001718  loss: 3.3436 (3.3234)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8349 (0.8597)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [172]  [1800/2502]  eta: 0:03:55  lr: 0.001716  min_lr: 0.001716  loss: 3.1841 (3.3182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9388 (0.8649)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [172]  [2000/2502]  eta: 0:02:48  lr: 0.001714  min_lr: 0.001714  loss: 3.2946 (3.3208)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7908 (0.8571)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [172]  [2200/2502]  eta: 0:01:41  lr: 0.001713  min_lr: 0.001713  loss: 3.1410 (3.3229)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8929 (0.8610)  time: 0.3402  data: 0.0004  max mem: 27255
Epoch: [172]  [2400/2502]  eta: 0:00:34  lr: 0.001711  min_lr: 0.001711  loss: 3.6797 (3.3220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8046 (0.8581)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [172]  [2501/2502]  eta: 0:00:00  lr: 0.001710  min_lr: 0.001710  loss: 3.4343 (3.3203)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8479 (0.8579)  time: 0.3008  data: 0.0009  max mem: 27255
Epoch: [172] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.001710  min_lr: 0.001710  loss: 3.4343 (3.3229)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8479 (0.8579)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.6214 (0.6214)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.6190  data: 2.4143  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8516 (0.8874)  acc1: 83.2000 (84.1818)  acc5: 97.2000 (96.8000)  time: 0.4840  data: 0.3165  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8820 (0.9141)  acc1: 82.4000 (83.0857)  acc5: 97.2000 (97.1238)  time: 0.2314  data: 0.0677  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0658 (1.0081)  acc1: 79.2000 (81.1484)  acc5: 95.2000 (95.7936)  time: 0.1852  data: 0.0214  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2379 (1.0755)  acc1: 74.4000 (79.5805)  acc5: 92.4000 (95.0146)  time: 0.1933  data: 0.0296  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2748 (1.0982)  acc1: 73.6000 (78.8640)  acc5: 92.8000 (94.8880)  time: 0.1861  data: 0.0226  max mem: 27255
Test: Total time: 0:00:12 (0.2536 s / it)
* Acc@1 78.868 Acc@5 94.862 loss 1.094
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 79.09%
Epoch: [173]  [   0/2502]  eta: 1:24:58  lr: 0.001710  min_lr: 0.001710  loss: 3.8345 (3.8345)  weight_decay: 0.0500 (0.0500)  time: 2.0379  data: 1.6909  max mem: 27255
Epoch: [173]  [ 200/2502]  eta: 0:13:10  lr: 0.001708  min_lr: 0.001708  loss: 3.4489 (3.3083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8407 (0.8854)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [173]  [ 400/2502]  eta: 0:11:52  lr: 0.001706  min_lr: 0.001706  loss: 3.4159 (3.3017)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8981 (0.8960)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [173]  [ 600/2502]  eta: 0:10:41  lr: 0.001705  min_lr: 0.001705  loss: 3.4234 (3.3133)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8573 (0.8861)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [173]  [ 800/2502]  eta: 0:09:32  lr: 0.001703  min_lr: 0.001703  loss: 3.3590 (3.2985)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8268 (0.8767)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [173]  [1000/2502]  eta: 0:08:24  lr: 0.001701  min_lr: 0.001701  loss: 3.3842 (3.2940)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8347 (0.8734)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [173]  [1200/2502]  eta: 0:07:16  lr: 0.001699  min_lr: 0.001699  loss: 3.1947 (3.2914)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8409 (0.8779)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [173]  [1400/2502]  eta: 0:06:09  lr: 0.001697  min_lr: 0.001697  loss: 3.2984 (3.2958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8271 (0.8781)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [173]  [1600/2502]  eta: 0:05:02  lr: 0.001696  min_lr: 0.001696  loss: 3.3791 (3.3002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8550 (inf)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [173]  [1800/2502]  eta: 0:03:55  lr: 0.001694  min_lr: 0.001694  loss: 3.5414 (3.3068)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8591 (inf)  time: 0.3365  data: 0.0005  max mem: 27255
Epoch: [173]  [2000/2502]  eta: 0:02:48  lr: 0.001692  min_lr: 0.001692  loss: 3.5687 (3.3142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7969 (inf)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [173]  [2200/2502]  eta: 0:01:41  lr: 0.001690  min_lr: 0.001690  loss: 3.5074 (3.3122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8128 (inf)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [173]  [2400/2502]  eta: 0:00:34  lr: 0.001689  min_lr: 0.001689  loss: 3.3689 (3.3114)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8135 (inf)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [173]  [2501/2502]  eta: 0:00:00  lr: 0.001688  min_lr: 0.001688  loss: 3.5241 (3.3108)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8424 (inf)  time: 0.3047  data: 0.0009  max mem: 27255
Epoch: [173] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.001688  min_lr: 0.001688  loss: 3.5241 (3.3137)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8424 (inf)
Test:  [ 0/50]  eta: 0:02:16  loss: 0.5662 (0.5662)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.7360  data: 2.5322  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8463 (0.9109)  acc1: 85.2000 (84.4000)  acc5: 96.8000 (96.9091)  time: 0.4868  data: 0.3190  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9216 (0.9236)  acc1: 83.2000 (83.6571)  acc5: 97.2000 (97.1238)  time: 0.2656  data: 0.1017  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0991 (1.0322)  acc1: 78.4000 (81.0710)  acc5: 94.8000 (95.8065)  time: 0.2448  data: 0.0795  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3089 (1.1028)  acc1: 76.0000 (79.4439)  acc5: 92.4000 (95.0244)  time: 0.1919  data: 0.0267  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2893 (1.1262)  acc1: 76.0000 (78.8560)  acc5: 92.8000 (94.8640)  time: 0.1741  data: 0.0107  max mem: 27255
Test: Total time: 0:00:13 (0.2691 s / it)
* Acc@1 78.886 Acc@5 94.862 loss 1.124
Accuracy of the model on the 50000 test images: 78.9%
Max accuracy: 79.09%
Epoch: [174]  [   0/2502]  eta: 1:27:33  lr: 0.001688  min_lr: 0.001688  loss: 3.1656 (3.1656)  weight_decay: 0.0500 (0.0500)  time: 2.0997  data: 1.5495  max mem: 27255
Epoch: [174]  [ 200/2502]  eta: 0:13:09  lr: 0.001686  min_lr: 0.001686  loss: 3.1816 (3.3245)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8533 (0.8533)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [174]  [ 400/2502]  eta: 0:11:51  lr: 0.001684  min_lr: 0.001684  loss: 3.4820 (3.3220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8420 (0.8603)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [174]  [ 600/2502]  eta: 0:10:40  lr: 0.001682  min_lr: 0.001682  loss: 3.3631 (3.3225)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8024 (0.8751)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [174]  [ 800/2502]  eta: 0:09:32  lr: 0.001681  min_lr: 0.001681  loss: 3.2906 (3.3212)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8587 (0.8635)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [174]  [1000/2502]  eta: 0:08:24  lr: 0.001679  min_lr: 0.001679  loss: 3.3322 (3.3172)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8638 (0.8646)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [174]  [1200/2502]  eta: 0:07:17  lr: 0.001677  min_lr: 0.001677  loss: 3.4065 (3.3171)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8289 (0.8635)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [174]  [1400/2502]  eta: 0:06:09  lr: 0.001675  min_lr: 0.001675  loss: 3.2096 (3.3145)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8264 (0.8590)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [174]  [1600/2502]  eta: 0:05:02  lr: 0.001674  min_lr: 0.001674  loss: 3.4378 (3.3158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8250 (0.8591)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [174]  [1800/2502]  eta: 0:03:55  lr: 0.001672  min_lr: 0.001672  loss: 3.4508 (3.3135)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7919 (0.8601)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [174]  [2000/2502]  eta: 0:02:48  lr: 0.001670  min_lr: 0.001670  loss: 3.2080 (3.3090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8007 (0.8573)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [174]  [2200/2502]  eta: 0:01:41  lr: 0.001668  min_lr: 0.001668  loss: 3.2047 (3.3039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9376 (0.8618)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [174]  [2400/2502]  eta: 0:00:34  lr: 0.001666  min_lr: 0.001666  loss: 3.3366 (3.3055)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8547 (0.8614)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [174]  [2501/2502]  eta: 0:00:00  lr: 0.001666  min_lr: 0.001666  loss: 3.5439 (3.3056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8325 (0.8607)  time: 0.3036  data: 0.0009  max mem: 27255
Epoch: [174] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.001666  min_lr: 0.001666  loss: 3.5439 (3.3056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8325 (0.8607)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.5973 (0.5973)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.1389  data: 2.9400  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8534 (0.8639)  acc1: 84.0000 (84.2182)  acc5: 97.2000 (97.2000)  time: 0.4582  data: 0.2911  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8683 (0.8857)  acc1: 82.4000 (83.0857)  acc5: 97.2000 (97.0667)  time: 0.2029  data: 0.0389  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0133 (0.9965)  acc1: 79.2000 (81.0581)  acc5: 95.6000 (95.6516)  time: 0.2317  data: 0.0664  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2556 (1.0620)  acc1: 75.2000 (79.3366)  acc5: 92.8000 (95.1415)  time: 0.2231  data: 0.0578  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2001 (1.0864)  acc1: 75.2000 (78.8080)  acc5: 93.6000 (94.9920)  time: 0.2200  data: 0.0563  max mem: 27255
Test: Total time: 0:00:13 (0.2646 s / it)
* Acc@1 79.090 Acc@5 94.984 loss 1.083
Accuracy of the model on the 50000 test images: 79.1%
Max accuracy: 79.09%
Epoch: [175]  [   0/2502]  eta: 1:20:27  lr: 0.001666  min_lr: 0.001666  loss: 2.7932 (2.7932)  weight_decay: 0.0500 (0.0500)  time: 1.9293  data: 1.5840  max mem: 27255
Epoch: [175]  [ 200/2502]  eta: 0:13:08  lr: 0.001664  min_lr: 0.001664  loss: 3.2323 (3.3083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8121 (0.8530)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [175]  [ 400/2502]  eta: 0:11:52  lr: 0.001662  min_lr: 0.001662  loss: 3.4073 (3.3053)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8173 (0.8627)  time: 0.3344  data: 0.0003  max mem: 27255
Epoch: [175]  [ 600/2502]  eta: 0:10:42  lr: 0.001660  min_lr: 0.001660  loss: 3.5764 (3.2842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8848 (0.8846)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [175]  [ 800/2502]  eta: 0:09:33  lr: 0.001658  min_lr: 0.001658  loss: 3.4266 (3.2976)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8663 (0.8756)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [175]  [1000/2502]  eta: 0:08:25  lr: 0.001657  min_lr: 0.001657  loss: 3.3058 (3.3033)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7964 (0.8714)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [175]  [1200/2502]  eta: 0:07:17  lr: 0.001655  min_lr: 0.001655  loss: 3.4169 (3.3074)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8331 (0.8693)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [175]  [1400/2502]  eta: 0:06:09  lr: 0.001653  min_lr: 0.001653  loss: 3.4940 (3.3038)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8845 (0.8705)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [175]  [1600/2502]  eta: 0:05:02  lr: 0.001651  min_lr: 0.001651  loss: 3.0570 (3.3062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9128 (0.8748)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [175]  [1800/2502]  eta: 0:03:55  lr: 0.001650  min_lr: 0.001650  loss: 3.3256 (3.3098)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8746 (0.8763)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [175]  [2000/2502]  eta: 0:02:48  lr: 0.001648  min_lr: 0.001648  loss: 3.2542 (3.3056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8976 (0.8747)  time: 0.3365  data: 0.0005  max mem: 27255
Epoch: [175]  [2200/2502]  eta: 0:01:41  lr: 0.001646  min_lr: 0.001646  loss: 3.4610 (3.3074)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7930 (0.8716)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [175]  [2400/2502]  eta: 0:00:34  lr: 0.001644  min_lr: 0.001644  loss: 3.2730 (3.3041)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7822 (0.8684)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [175]  [2501/2502]  eta: 0:00:00  lr: 0.001644  min_lr: 0.001644  loss: 3.5156 (3.3091)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8116 (0.8680)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [175] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.001644  min_lr: 0.001644  loss: 3.5156 (3.3077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8116 (0.8680)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.6990 (0.6990)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 3.1967  data: 3.0008  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9289 (0.9902)  acc1: 84.0000 (84.6909)  acc5: 96.8000 (96.8727)  time: 0.4808  data: 0.3139  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9937 (1.0223)  acc1: 82.4000 (83.6381)  acc5: 96.8000 (97.1048)  time: 0.2101  data: 0.0462  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.2076 (1.1257)  acc1: 78.8000 (81.3161)  acc5: 95.6000 (95.7161)  time: 0.1878  data: 0.0238  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3765 (1.1918)  acc1: 74.4000 (79.3951)  acc5: 92.8000 (95.0244)  time: 0.1923  data: 0.0284  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3568 (1.2099)  acc1: 75.2000 (78.8080)  acc5: 93.6000 (94.8400)  time: 0.2008  data: 0.0371  max mem: 27255
Test: Total time: 0:00:12 (0.2599 s / it)
* Acc@1 78.986 Acc@5 94.800 loss 1.212
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.09%
Epoch: [176]  [   0/2502]  eta: 1:29:08  lr: 0.001643  min_lr: 0.001643  loss: 4.0846 (4.0846)  weight_decay: 0.0500 (0.0500)  time: 2.1375  data: 1.7566  max mem: 27255
Epoch: [176]  [ 200/2502]  eta: 0:13:09  lr: 0.001642  min_lr: 0.001642  loss: 3.4931 (3.3117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8577 (0.8762)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [176]  [ 400/2502]  eta: 0:11:50  lr: 0.001640  min_lr: 0.001640  loss: 3.2856 (3.3025)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8370 (0.8643)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [176]  [ 600/2502]  eta: 0:10:39  lr: 0.001638  min_lr: 0.001638  loss: 3.4881 (3.2850)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9443 (0.8772)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [176]  [ 800/2502]  eta: 0:09:31  lr: 0.001636  min_lr: 0.001636  loss: 3.3622 (3.2957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7508 (0.8656)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [176]  [1000/2502]  eta: 0:08:24  lr: 0.001635  min_lr: 0.001635  loss: 3.3827 (3.3009)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9139 (0.8760)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [176]  [1200/2502]  eta: 0:07:17  lr: 0.001633  min_lr: 0.001633  loss: 3.6150 (3.3030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8729 (0.8783)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [176]  [1400/2502]  eta: 0:06:09  lr: 0.001631  min_lr: 0.001631  loss: 3.3937 (3.3062)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9030 (0.8856)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [176]  [1600/2502]  eta: 0:05:02  lr: 0.001629  min_lr: 0.001629  loss: 3.0858 (3.3073)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8770 (0.8785)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [176]  [1800/2502]  eta: 0:03:55  lr: 0.001628  min_lr: 0.001628  loss: 3.4908 (3.3109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8041 (0.8757)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [176]  [2000/2502]  eta: 0:02:48  lr: 0.001626  min_lr: 0.001626  loss: 3.1453 (3.3057)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8757 (0.8754)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [176]  [2200/2502]  eta: 0:01:41  lr: 0.001624  min_lr: 0.001624  loss: 2.8205 (3.3064)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9164 (0.8748)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [176]  [2400/2502]  eta: 0:00:34  lr: 0.001622  min_lr: 0.001622  loss: 3.3336 (3.3058)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8556 (0.8737)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [176]  [2501/2502]  eta: 0:00:00  lr: 0.001621  min_lr: 0.001621  loss: 3.2040 (3.3057)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8352 (0.8734)  time: 0.2997  data: 0.0008  max mem: 27255
Epoch: [176] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.001621  min_lr: 0.001621  loss: 3.2040 (3.2958)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8352 (0.8734)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.5413 (0.5413)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.8443  data: 2.6488  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9337 (0.9156)  acc1: 85.2000 (84.4364)  acc5: 96.8000 (96.7273)  time: 0.4224  data: 0.2552  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9640 (0.9497)  acc1: 81.2000 (82.9524)  acc5: 96.8000 (96.9143)  time: 0.1844  data: 0.0202  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0556 (1.0468)  acc1: 78.8000 (81.1097)  acc5: 96.4000 (95.6387)  time: 0.1831  data: 0.0192  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3166 (1.1130)  acc1: 74.4000 (79.1415)  acc5: 92.4000 (95.0634)  time: 0.1884  data: 0.0246  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3166 (1.1251)  acc1: 74.4000 (78.7520)  acc5: 93.2000 (95.0320)  time: 0.1836  data: 0.0188  max mem: 27255
Test: Total time: 0:00:11 (0.2381 s / it)
* Acc@1 78.996 Acc@5 95.026 loss 1.124
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.09%
Epoch: [177]  [   0/2502]  eta: 1:30:28  lr: 0.001621  min_lr: 0.001621  loss: 3.4527 (3.4527)  weight_decay: 0.0500 (0.0500)  time: 2.1697  data: 1.3872  max mem: 27255
Epoch: [177]  [ 200/2502]  eta: 0:13:10  lr: 0.001620  min_lr: 0.001620  loss: 3.5469 (3.2565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8567 (0.9018)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [177]  [ 400/2502]  eta: 0:11:52  lr: 0.001618  min_lr: 0.001618  loss: 3.4789 (3.2751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8143 (0.8890)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [177]  [ 600/2502]  eta: 0:10:41  lr: 0.001616  min_lr: 0.001616  loss: 3.5313 (3.2813)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8913 (0.8930)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [177]  [ 800/2502]  eta: 0:09:32  lr: 0.001614  min_lr: 0.001614  loss: 3.3152 (3.2822)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7507 (0.8832)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [177]  [1000/2502]  eta: 0:08:25  lr: 0.001613  min_lr: 0.001613  loss: 3.4779 (3.2932)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9048 (0.8826)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [177]  [1200/2502]  eta: 0:07:17  lr: 0.001611  min_lr: 0.001611  loss: 3.5083 (3.2977)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8728 (0.8828)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [177]  [1400/2502]  eta: 0:06:10  lr: 0.001609  min_lr: 0.001609  loss: 3.0106 (3.2933)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9132 (0.8858)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [177]  [1600/2502]  eta: 0:05:02  lr: 0.001607  min_lr: 0.001607  loss: 3.4032 (3.2957)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8458 (0.8875)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [177]  [1800/2502]  eta: 0:03:55  lr: 0.001606  min_lr: 0.001606  loss: 3.5071 (3.3014)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8617 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [177]  [2000/2502]  eta: 0:02:48  lr: 0.001604  min_lr: 0.001604  loss: 3.3768 (3.3004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8033 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [177]  [2200/2502]  eta: 0:01:41  lr: 0.001602  min_lr: 0.001602  loss: 3.5179 (3.3039)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8688 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [177]  [2400/2502]  eta: 0:00:34  lr: 0.001600  min_lr: 0.001600  loss: 3.2374 (3.3026)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8562 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [177]  [2501/2502]  eta: 0:00:00  lr: 0.001599  min_lr: 0.001599  loss: 3.6128 (3.3083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8022 (inf)  time: 0.3011  data: 0.0008  max mem: 27255
Epoch: [177] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.001599  min_lr: 0.001599  loss: 3.6128 (3.2918)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8022 (inf)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.6511 (0.6511)  acc1: 91.2000 (91.2000)  acc5: 98.8000 (98.8000)  time: 2.9332  data: 2.7386  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9166 (0.9567)  acc1: 85.6000 (84.2909)  acc5: 97.6000 (96.9455)  time: 0.4601  data: 0.2930  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9763 (0.9811)  acc1: 80.8000 (83.1048)  acc5: 97.6000 (97.0667)  time: 0.2162  data: 0.0520  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1423 (1.0847)  acc1: 78.0000 (81.1355)  acc5: 95.6000 (95.7677)  time: 0.2449  data: 0.0809  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3318 (1.1489)  acc1: 75.2000 (79.4244)  acc5: 92.8000 (95.1707)  time: 0.2275  data: 0.0637  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3318 (1.1659)  acc1: 74.8000 (78.8800)  acc5: 92.8000 (94.9840)  time: 0.1744  data: 0.0107  max mem: 27255
Test: Total time: 0:00:13 (0.2674 s / it)
* Acc@1 79.248 Acc@5 95.082 loss 1.162
Accuracy of the model on the 50000 test images: 79.2%
Max accuracy: 79.25%
Epoch: [178]  [   0/2502]  eta: 1:31:54  lr: 0.001599  min_lr: 0.001599  loss: 2.9316 (2.9316)  weight_decay: 0.0500 (0.0500)  time: 2.2038  data: 1.8553  max mem: 27255
Epoch: [178]  [ 200/2502]  eta: 0:13:10  lr: 0.001598  min_lr: 0.001598  loss: 3.3710 (3.2414)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9131 (0.9726)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [178]  [ 400/2502]  eta: 0:11:53  lr: 0.001596  min_lr: 0.001596  loss: 3.0057 (3.2328)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8274 (0.9262)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [178]  [ 600/2502]  eta: 0:10:42  lr: 0.001594  min_lr: 0.001594  loss: 3.1895 (3.2509)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8899 (0.9133)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [178]  [ 800/2502]  eta: 0:09:32  lr: 0.001592  min_lr: 0.001592  loss: 3.2551 (3.2681)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8758 (0.9115)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [178]  [1000/2502]  eta: 0:08:24  lr: 0.001591  min_lr: 0.001591  loss: 2.9849 (3.2617)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8090 (0.8958)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [178]  [1200/2502]  eta: 0:07:17  lr: 0.001589  min_lr: 0.001589  loss: 3.1541 (3.2679)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9202 (0.9023)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [178]  [1400/2502]  eta: 0:06:09  lr: 0.001587  min_lr: 0.001587  loss: 3.6251 (3.2625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8116 (0.8936)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [178]  [1600/2502]  eta: 0:05:02  lr: 0.001585  min_lr: 0.001585  loss: 3.3366 (3.2657)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8870 (0.8927)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [178]  [1800/2502]  eta: 0:03:55  lr: 0.001584  min_lr: 0.001584  loss: 3.5111 (3.2738)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8183 (0.8952)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [178]  [2000/2502]  eta: 0:02:48  lr: 0.001582  min_lr: 0.001582  loss: 3.1418 (3.2783)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9583 (0.8948)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [178]  [2200/2502]  eta: 0:01:41  lr: 0.001580  min_lr: 0.001580  loss: 3.4754 (3.2751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8501 (0.8926)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [178]  [2400/2502]  eta: 0:00:34  lr: 0.001578  min_lr: 0.001578  loss: 3.3585 (3.2773)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8691 (0.8943)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [178]  [2501/2502]  eta: 0:00:00  lr: 0.001578  min_lr: 0.001578  loss: 3.2911 (3.2782)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8825 (0.8955)  time: 0.3006  data: 0.0008  max mem: 27255
Epoch: [178] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.001578  min_lr: 0.001578  loss: 3.2911 (3.2886)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8825 (0.8955)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.5782 (0.5782)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.7537  data: 2.5519  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8675 (0.8788)  acc1: 84.4000 (85.3091)  acc5: 96.8000 (96.8000)  time: 0.4504  data: 0.2745  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8725 (0.9005)  acc1: 83.2000 (83.8667)  acc5: 96.8000 (96.8952)  time: 0.2300  data: 0.0609  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0572 (0.9956)  acc1: 79.6000 (81.7806)  acc5: 95.6000 (95.8194)  time: 0.2247  data: 0.0603  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2299 (1.0535)  acc1: 74.4000 (80.0195)  acc5: 92.8000 (95.0049)  time: 0.2028  data: 0.0390  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2402 (1.0766)  acc1: 74.0000 (79.2960)  acc5: 92.8000 (94.8000)  time: 0.1848  data: 0.0211  max mem: 27255
Test: Total time: 0:00:13 (0.2615 s / it)
* Acc@1 79.020 Acc@5 94.916 loss 1.077
Accuracy of the model on the 50000 test images: 79.0%
Max accuracy: 79.25%
Epoch: [179]  [   0/2502]  eta: 1:22:32  lr: 0.001577  min_lr: 0.001577  loss: 3.7151 (3.7151)  weight_decay: 0.0500 (0.0500)  time: 1.9795  data: 1.5023  max mem: 27255
Epoch: [179]  [ 200/2502]  eta: 0:13:08  lr: 0.001576  min_lr: 0.001576  loss: 3.6396 (3.3064)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8900 (0.8768)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [179]  [ 400/2502]  eta: 0:11:51  lr: 0.001574  min_lr: 0.001574  loss: 3.2672 (3.2884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8813 (0.8889)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [179]  [ 600/2502]  eta: 0:10:41  lr: 0.001572  min_lr: 0.001572  loss: 3.4813 (3.2960)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8743 (0.8863)  time: 0.3426  data: 0.0004  max mem: 27255
Epoch: [179]  [ 800/2502]  eta: 0:09:33  lr: 0.001570  min_lr: 0.001570  loss: 3.2693 (3.2863)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8332 (0.8926)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [179]  [1000/2502]  eta: 0:08:26  lr: 0.001569  min_lr: 0.001569  loss: 3.1617 (3.2714)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9130 (0.8941)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [179]  [1200/2502]  eta: 0:07:18  lr: 0.001567  min_lr: 0.001567  loss: 3.2013 (3.2718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9006 (0.8953)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [179]  [1400/2502]  eta: 0:06:10  lr: 0.001565  min_lr: 0.001565  loss: 3.4733 (3.2818)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9473 (0.9042)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [179]  [1600/2502]  eta: 0:05:03  lr: 0.001563  min_lr: 0.001563  loss: 3.3139 (3.2808)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8515 (0.8998)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [179]  [1800/2502]  eta: 0:03:55  lr: 0.001562  min_lr: 0.001562  loss: 3.2228 (3.2815)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8591 (0.8995)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [179]  [2000/2502]  eta: 0:02:48  lr: 0.001560  min_lr: 0.001560  loss: 3.2982 (3.2844)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8828 (0.8966)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [179]  [2200/2502]  eta: 0:01:41  lr: 0.001558  min_lr: 0.001558  loss: 3.3654 (3.2769)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8425 (0.8960)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [179]  [2400/2502]  eta: 0:00:34  lr: 0.001556  min_lr: 0.001556  loss: 3.5464 (3.2771)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9427 (0.8966)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [179]  [2501/2502]  eta: 0:00:00  lr: 0.001556  min_lr: 0.001556  loss: 2.8025 (3.2748)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9180 (0.8983)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [179] Total time: 0:13:59 (0.3353 s / it)
Averaged stats: lr: 0.001556  min_lr: 0.001556  loss: 2.8025 (3.2859)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9180 (0.8983)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.5320 (0.5320)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.1079  data: 2.9068  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8189 (0.8507)  acc1: 87.2000 (84.7636)  acc5: 97.6000 (97.0546)  time: 0.4761  data: 0.3088  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8608 (0.8729)  acc1: 83.2000 (83.9048)  acc5: 97.2000 (97.2571)  time: 0.2105  data: 0.0466  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0491 (0.9720)  acc1: 78.4000 (81.6645)  acc5: 95.2000 (95.8323)  time: 0.1934  data: 0.0295  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2118 (1.0380)  acc1: 74.0000 (80.0000)  acc5: 92.8000 (95.0732)  time: 0.1805  data: 0.0166  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1979 (1.0581)  acc1: 74.0000 (79.3040)  acc5: 92.8000 (94.9760)  time: 0.1731  data: 0.0093  max mem: 27255
Test: Total time: 0:00:12 (0.2500 s / it)
* Acc@1 79.282 Acc@5 95.056 loss 1.060
Accuracy of the model on the 50000 test images: 79.3%
Max accuracy: 79.28%
Epoch: [180]  [   0/2502]  eta: 1:18:41  lr: 0.001556  min_lr: 0.001556  loss: 2.0554 (2.0554)  weight_decay: 0.0500 (0.0500)  time: 1.8872  data: 1.5382  max mem: 27255
Epoch: [180]  [ 200/2502]  eta: 0:13:08  lr: 0.001554  min_lr: 0.001554  loss: 3.0620 (3.2215)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8160 (0.8449)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [180]  [ 400/2502]  eta: 0:11:53  lr: 0.001552  min_lr: 0.001552  loss: 3.3784 (3.2400)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8628 (0.8851)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [180]  [ 600/2502]  eta: 0:10:43  lr: 0.001550  min_lr: 0.001550  loss: 3.5697 (3.2663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8453 (0.8759)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [180]  [ 800/2502]  eta: 0:09:34  lr: 0.001549  min_lr: 0.001549  loss: 3.1019 (3.2521)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8171 (0.8723)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [180]  [1000/2502]  eta: 0:08:25  lr: 0.001547  min_lr: 0.001547  loss: 3.0267 (3.2402)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8728 (0.8706)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [180]  [1200/2502]  eta: 0:07:18  lr: 0.001545  min_lr: 0.001545  loss: 3.2971 (3.2485)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9059 (0.8723)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [180]  [1400/2502]  eta: 0:06:10  lr: 0.001543  min_lr: 0.001543  loss: 3.4179 (3.2478)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8246 (0.8823)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [180]  [1600/2502]  eta: 0:05:03  lr: 0.001542  min_lr: 0.001542  loss: 3.5856 (3.2582)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7902 (0.8799)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [180]  [1800/2502]  eta: 0:03:55  lr: 0.001540  min_lr: 0.001540  loss: 3.3397 (3.2583)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8157 (0.8770)  time: 0.3439  data: 0.0005  max mem: 27255
Epoch: [180]  [2000/2502]  eta: 0:02:48  lr: 0.001538  min_lr: 0.001538  loss: 3.3167 (3.2625)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8812 (0.8801)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [180]  [2200/2502]  eta: 0:01:41  lr: 0.001536  min_lr: 0.001536  loss: 3.3485 (3.2707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7942 (0.8808)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [180]  [2400/2502]  eta: 0:00:34  lr: 0.001535  min_lr: 0.001535  loss: 3.2013 (3.2720)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8212 (0.8801)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [180]  [2501/2502]  eta: 0:00:00  lr: 0.001534  min_lr: 0.001534  loss: 3.3723 (3.2715)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9056 (0.8815)  time: 0.3007  data: 0.0007  max mem: 27255
Epoch: [180] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.001534  min_lr: 0.001534  loss: 3.3723 (3.2803)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9056 (0.8815)
Test:  [ 0/50]  eta: 0:02:57  loss: 0.6774 (0.6774)  acc1: 90.0000 (90.0000)  acc5: 99.6000 (99.6000)  time: 3.5553  data: 3.3577  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8806 (0.9337)  acc1: 85.6000 (84.7636)  acc5: 97.6000 (97.0182)  time: 0.4880  data: 0.3203  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9543 (0.9498)  acc1: 82.8000 (83.7524)  acc5: 97.6000 (97.0667)  time: 0.1837  data: 0.0193  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0733 (1.0475)  acc1: 79.2000 (81.6000)  acc5: 94.4000 (95.6903)  time: 0.1927  data: 0.0286  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2973 (1.1191)  acc1: 75.2000 (79.8342)  acc5: 92.4000 (94.8976)  time: 0.1909  data: 0.0269  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2543 (1.1310)  acc1: 74.8000 (79.3520)  acc5: 93.6000 (94.8000)  time: 0.1905  data: 0.0267  max mem: 27255
Test: Total time: 0:00:12 (0.2526 s / it)
* Acc@1 79.152 Acc@5 95.002 loss 1.130
Accuracy of the model on the 50000 test images: 79.2%
Max accuracy: 79.28%
Epoch: [181]  [   0/2502]  eta: 1:23:36  lr: 0.001534  min_lr: 0.001534  loss: 3.5650 (3.5650)  weight_decay: 0.0500 (0.0500)  time: 2.0048  data: 1.5709  max mem: 27255
Epoch: [181]  [ 200/2502]  eta: 0:13:08  lr: 0.001532  min_lr: 0.001532  loss: 3.5663 (3.3262)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9066 (0.9021)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [181]  [ 400/2502]  eta: 0:11:50  lr: 0.001530  min_lr: 0.001530  loss: 2.8177 (3.3018)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8435 (0.9158)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [181]  [ 600/2502]  eta: 0:10:40  lr: 0.001528  min_lr: 0.001528  loss: 3.4132 (3.2969)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8646 (0.9058)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [181]  [ 800/2502]  eta: 0:09:32  lr: 0.001527  min_lr: 0.001527  loss: 3.4065 (3.3007)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8098 (0.8937)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [181]  [1000/2502]  eta: 0:08:24  lr: 0.001525  min_lr: 0.001525  loss: 3.4037 (3.2890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8830 (0.8995)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [181]  [1200/2502]  eta: 0:07:17  lr: 0.001523  min_lr: 0.001523  loss: 3.2763 (3.2875)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8725 (0.9015)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [181]  [1400/2502]  eta: 0:06:10  lr: 0.001522  min_lr: 0.001522  loss: 3.5408 (3.2895)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8388 (0.9004)  time: 0.3418  data: 0.0005  max mem: 27255
Epoch: [181]  [1600/2502]  eta: 0:05:02  lr: 0.001520  min_lr: 0.001520  loss: 3.3277 (3.2839)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9570 (0.9028)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [181]  [1800/2502]  eta: 0:03:55  lr: 0.001518  min_lr: 0.001518  loss: 3.3201 (3.2827)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9116 (0.9044)  time: 0.3385  data: 0.0003  max mem: 27255
Epoch: [181]  [2000/2502]  eta: 0:02:48  lr: 0.001516  min_lr: 0.001516  loss: 3.4162 (3.2849)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8189 (0.9014)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [181]  [2200/2502]  eta: 0:01:41  lr: 0.001515  min_lr: 0.001515  loss: 3.4529 (3.2784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8687 (0.8984)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [181]  [2400/2502]  eta: 0:00:34  lr: 0.001513  min_lr: 0.001513  loss: 3.4666 (3.2812)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8369 (0.8977)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [181]  [2501/2502]  eta: 0:00:00  lr: 0.001512  min_lr: 0.001512  loss: 3.1957 (3.2816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9946 (inf)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [181] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.001512  min_lr: 0.001512  loss: 3.1957 (3.2784)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9946 (inf)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.5002 (0.5002)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 2.6431  data: 2.4543  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8267 (0.8433)  acc1: 86.0000 (85.3091)  acc5: 97.6000 (97.5273)  time: 0.4263  data: 0.2604  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8459 (0.8728)  acc1: 83.2000 (83.6762)  acc5: 97.6000 (97.3714)  time: 0.1887  data: 0.0245  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0433 (0.9713)  acc1: 79.2000 (81.7419)  acc5: 94.8000 (96.0645)  time: 0.2180  data: 0.0527  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2082 (1.0371)  acc1: 76.0000 (80.0781)  acc5: 93.2000 (95.2098)  time: 0.2195  data: 0.0540  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2318 (1.0577)  acc1: 74.0000 (79.3680)  acc5: 93.6000 (95.0160)  time: 0.1730  data: 0.0053  max mem: 27255
Test: Total time: 0:00:12 (0.2488 s / it)
* Acc@1 79.498 Acc@5 95.088 loss 1.060
Accuracy of the model on the 50000 test images: 79.5%
Max accuracy: 79.50%
Epoch: [182]  [   0/2502]  eta: 1:17:07  lr: 0.001512  min_lr: 0.001512  loss: 2.2496 (2.2496)  weight_decay: 0.0500 (0.0500)  time: 1.8495  data: 1.5108  max mem: 27255
Epoch: [182]  [ 200/2502]  eta: 0:13:06  lr: 0.001510  min_lr: 0.001510  loss: 3.3922 (3.2637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9538 (0.9590)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [182]  [ 400/2502]  eta: 0:11:52  lr: 0.001508  min_lr: 0.001508  loss: 3.2702 (3.2687)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8448 (0.9228)  time: 0.3473  data: 0.0005  max mem: 27255
Epoch: [182]  [ 600/2502]  eta: 0:10:42  lr: 0.001507  min_lr: 0.001507  loss: 3.6513 (3.2710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8503 (0.9211)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [182]  [ 800/2502]  eta: 0:09:33  lr: 0.001505  min_lr: 0.001505  loss: 3.2424 (3.2723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8651 (0.9228)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [182]  [1000/2502]  eta: 0:08:25  lr: 0.001503  min_lr: 0.001503  loss: 3.3778 (3.2710)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8262 (0.9220)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [182]  [1200/2502]  eta: 0:07:17  lr: 0.001502  min_lr: 0.001502  loss: 3.4912 (3.2714)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8906 (0.9191)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [182]  [1400/2502]  eta: 0:06:10  lr: 0.001500  min_lr: 0.001500  loss: 3.4891 (3.2777)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9115 (0.9181)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [182]  [1600/2502]  eta: 0:05:02  lr: 0.001498  min_lr: 0.001498  loss: 3.3517 (3.2786)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9009 (0.9190)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [182]  [1800/2502]  eta: 0:03:55  lr: 0.001496  min_lr: 0.001496  loss: 3.4475 (3.2816)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9486 (0.9201)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [182]  [2000/2502]  eta: 0:02:48  lr: 0.001495  min_lr: 0.001495  loss: 3.3429 (3.2877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8264 (0.9154)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [182]  [2200/2502]  eta: 0:01:41  lr: 0.001493  min_lr: 0.001493  loss: 3.3614 (3.2907)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9518 (0.9165)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [182]  [2400/2502]  eta: 0:00:34  lr: 0.001491  min_lr: 0.001491  loss: 3.3069 (3.2830)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9339 (0.9170)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [182]  [2501/2502]  eta: 0:00:00  lr: 0.001490  min_lr: 0.001490  loss: 3.3919 (3.2814)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9931 (0.9193)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [182] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.001490  min_lr: 0.001490  loss: 3.3919 (3.2755)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9931 (0.9193)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.6571 (0.6571)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.0304  data: 2.8417  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9137 (0.9517)  acc1: 86.0000 (84.9455)  acc5: 97.2000 (97.2364)  time: 0.4423  data: 0.2754  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9449 (0.9746)  acc1: 83.2000 (83.9429)  acc5: 97.2000 (97.2381)  time: 0.1819  data: 0.0176  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0551 (1.0743)  acc1: 79.2000 (81.9871)  acc5: 94.8000 (96.0516)  time: 0.1979  data: 0.0340  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3398 (1.1438)  acc1: 74.8000 (80.1659)  acc5: 93.2000 (95.4244)  time: 0.2215  data: 0.0577  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2997 (1.1653)  acc1: 74.8000 (79.5040)  acc5: 93.2000 (95.1920)  time: 0.2096  data: 0.0438  max mem: 27255
Test: Total time: 0:00:12 (0.2589 s / it)
* Acc@1 79.568 Acc@5 95.276 loss 1.164
Accuracy of the model on the 50000 test images: 79.6%
Max accuracy: 79.57%
Epoch: [183]  [   0/2502]  eta: 1:35:01  lr: 0.001490  min_lr: 0.001490  loss: 4.1115 (4.1115)  weight_decay: 0.0500 (0.0500)  time: 2.2786  data: 1.9367  max mem: 27255
Epoch: [183]  [ 200/2502]  eta: 0:13:14  lr: 0.001488  min_lr: 0.001488  loss: 3.2925 (3.2404)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8520 (0.9181)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [183]  [ 400/2502]  eta: 0:11:53  lr: 0.001487  min_lr: 0.001487  loss: 3.3870 (3.2371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8662 (0.9152)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [183]  [ 600/2502]  eta: 0:10:41  lr: 0.001485  min_lr: 0.001485  loss: 3.2713 (3.2350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9335 (0.9125)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [183]  [ 800/2502]  eta: 0:09:33  lr: 0.001483  min_lr: 0.001483  loss: 3.4123 (3.2454)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9224 (0.9199)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [183]  [1000/2502]  eta: 0:08:24  lr: 0.001482  min_lr: 0.001482  loss: 3.4087 (3.2396)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9141 (0.9177)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [183]  [1200/2502]  eta: 0:07:17  lr: 0.001480  min_lr: 0.001480  loss: 3.2830 (3.2412)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8959 (0.9222)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [183]  [1400/2502]  eta: 0:06:09  lr: 0.001478  min_lr: 0.001478  loss: 3.2357 (3.2472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8928 (0.9178)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [183]  [1600/2502]  eta: 0:05:03  lr: 0.001476  min_lr: 0.001476  loss: 3.6481 (3.2538)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8728 (0.9114)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [183]  [1800/2502]  eta: 0:03:55  lr: 0.001475  min_lr: 0.001475  loss: 3.5168 (3.2593)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9531 (0.9155)  time: 0.3359  data: 0.0005  max mem: 27255
Epoch: [183]  [2000/2502]  eta: 0:02:48  lr: 0.001473  min_lr: 0.001473  loss: 3.2832 (3.2594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8422 (0.9132)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [183]  [2200/2502]  eta: 0:01:41  lr: 0.001471  min_lr: 0.001471  loss: 3.1999 (3.2635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8551 (0.9130)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [183]  [2400/2502]  eta: 0:00:34  lr: 0.001469  min_lr: 0.001469  loss: 3.3437 (3.2614)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9172 (0.9141)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [183]  [2501/2502]  eta: 0:00:00  lr: 0.001469  min_lr: 0.001469  loss: 3.1193 (3.2608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9225 (0.9162)  time: 0.2999  data: 0.0008  max mem: 27255
Epoch: [183] Total time: 0:13:59 (0.3353 s / it)
Averaged stats: lr: 0.001469  min_lr: 0.001469  loss: 3.1193 (3.2609)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9225 (0.9162)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.5402 (0.5402)  acc1: 92.0000 (92.0000)  acc5: 99.2000 (99.2000)  time: 2.6270  data: 2.4207  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8631 (0.8877)  acc1: 85.6000 (85.0182)  acc5: 97.2000 (97.1636)  time: 0.4404  data: 0.2725  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8789 (0.9143)  acc1: 82.4000 (83.8286)  acc5: 97.6000 (97.2762)  time: 0.2253  data: 0.0613  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0706 (1.0153)  acc1: 78.4000 (81.7290)  acc5: 94.8000 (95.8323)  time: 0.2030  data: 0.0390  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2625 (1.0746)  acc1: 76.0000 (80.0000)  acc5: 92.8000 (95.2585)  time: 0.1873  data: 0.0234  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2478 (1.0945)  acc1: 74.8000 (79.4480)  acc5: 93.6000 (95.1520)  time: 0.1814  data: 0.0170  max mem: 27255
Test: Total time: 0:00:12 (0.2493 s / it)
* Acc@1 79.396 Acc@5 95.192 loss 1.095
Accuracy of the model on the 50000 test images: 79.4%
Max accuracy: 79.57%
Epoch: [184]  [   0/2502]  eta: 1:32:37  lr: 0.001469  min_lr: 0.001469  loss: 4.0780 (4.0780)  weight_decay: 0.0500 (0.0500)  time: 2.2212  data: 1.8784  max mem: 27255
Epoch: [184]  [ 200/2502]  eta: 0:13:10  lr: 0.001467  min_lr: 0.001467  loss: 3.3256 (3.2385)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8493 (0.8881)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [184]  [ 400/2502]  eta: 0:11:53  lr: 0.001465  min_lr: 0.001465  loss: 3.4973 (3.2561)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8413 (0.8874)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [184]  [ 600/2502]  eta: 0:10:41  lr: 0.001463  min_lr: 0.001463  loss: 3.0503 (3.2471)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9169 (0.8931)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [184]  [ 800/2502]  eta: 0:09:33  lr: 0.001462  min_lr: 0.001462  loss: 3.3629 (3.2251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8814 (0.8992)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [184]  [1000/2502]  eta: 0:08:24  lr: 0.001460  min_lr: 0.001460  loss: 3.5751 (3.2533)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8574 (0.9080)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [184]  [1200/2502]  eta: 0:07:17  lr: 0.001458  min_lr: 0.001458  loss: 2.9538 (3.2601)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9387 (0.9131)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [184]  [1400/2502]  eta: 0:06:10  lr: 0.001456  min_lr: 0.001456  loss: 3.0428 (3.2678)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9487 (0.9166)  time: 0.3352  data: 0.0003  max mem: 27255
Epoch: [184]  [1600/2502]  eta: 0:05:02  lr: 0.001455  min_lr: 0.001455  loss: 3.2549 (3.2718)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9454 (0.9191)  time: 0.3425  data: 0.0004  max mem: 27255
Epoch: [184]  [1800/2502]  eta: 0:03:55  lr: 0.001453  min_lr: 0.001453  loss: 3.2625 (3.2708)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8926 (0.9221)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [184]  [2000/2502]  eta: 0:02:48  lr: 0.001451  min_lr: 0.001451  loss: 3.3222 (3.2746)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8809 (0.9182)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [184]  [2200/2502]  eta: 0:01:41  lr: 0.001450  min_lr: 0.001450  loss: 3.2742 (3.2744)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9331 (0.9236)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [184]  [2400/2502]  eta: 0:00:34  lr: 0.001448  min_lr: 0.001448  loss: 3.5119 (3.2748)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8540 (0.9211)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [184]  [2501/2502]  eta: 0:00:00  lr: 0.001447  min_lr: 0.001447  loss: 3.1635 (3.2740)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8299 (0.9201)  time: 0.3006  data: 0.0008  max mem: 27255
Epoch: [184] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.001447  min_lr: 0.001447  loss: 3.1635 (3.2662)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8299 (0.9201)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.5652 (0.5652)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 2.9222  data: 2.7262  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8811 (0.8655)  acc1: 86.4000 (85.3818)  acc5: 97.6000 (96.8364)  time: 0.4299  data: 0.2630  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8905 (0.8908)  acc1: 83.2000 (84.3429)  acc5: 97.6000 (97.0286)  time: 0.2017  data: 0.0320  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0290 (0.9876)  acc1: 79.2000 (82.0645)  acc5: 94.8000 (95.7419)  time: 0.2399  data: 0.0698  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2317 (1.0475)  acc1: 75.2000 (80.4000)  acc5: 92.8000 (95.1902)  time: 0.2545  data: 0.0902  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2317 (1.0693)  acc1: 74.8000 (79.7040)  acc5: 93.6000 (95.0880)  time: 0.2078  data: 0.0441  max mem: 27255
Test: Total time: 0:00:13 (0.2720 s / it)
* Acc@1 79.706 Acc@5 95.170 loss 1.069
Accuracy of the model on the 50000 test images: 79.7%
Max accuracy: 79.71%
Epoch: [185]  [   0/2502]  eta: 1:21:12  lr: 0.001447  min_lr: 0.001447  loss: 3.5243 (3.5243)  weight_decay: 0.0500 (0.0500)  time: 1.9473  data: 1.5985  max mem: 27255
Epoch: [185]  [ 200/2502]  eta: 0:13:13  lr: 0.001445  min_lr: 0.001445  loss: 3.4304 (3.3215)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9573 (0.9178)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [185]  [ 400/2502]  eta: 0:11:55  lr: 0.001444  min_lr: 0.001444  loss: 3.0294 (3.2803)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9421 (0.9317)  time: 0.3433  data: 0.0005  max mem: 27255
Epoch: [185]  [ 600/2502]  eta: 0:10:43  lr: 0.001442  min_lr: 0.001442  loss: 3.5348 (3.2823)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9525 (0.9452)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [185]  [ 800/2502]  eta: 0:09:35  lr: 0.001440  min_lr: 0.001440  loss: 3.4534 (3.2890)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8899 (0.9382)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [185]  [1000/2502]  eta: 0:08:26  lr: 0.001438  min_lr: 0.001438  loss: 3.3339 (3.2884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8903 (0.9356)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [185]  [1200/2502]  eta: 0:07:18  lr: 0.001437  min_lr: 0.001437  loss: 3.4883 (3.2832)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8327 (0.9285)  time: 0.3350  data: 0.0003  max mem: 27255
Epoch: [185]  [1400/2502]  eta: 0:06:10  lr: 0.001435  min_lr: 0.001435  loss: 3.5054 (3.2844)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0049 (0.9275)  time: 0.3426  data: 0.0004  max mem: 27255
Epoch: [185]  [1600/2502]  eta: 0:05:03  lr: 0.001433  min_lr: 0.001433  loss: 3.2073 (3.2772)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8811 (0.9259)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [185]  [1800/2502]  eta: 0:03:55  lr: 0.001431  min_lr: 0.001431  loss: 3.4434 (3.2765)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8902 (0.9243)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [185]  [2000/2502]  eta: 0:02:48  lr: 0.001430  min_lr: 0.001430  loss: 3.0882 (3.2746)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8487 (0.9231)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [185]  [2200/2502]  eta: 0:01:41  lr: 0.001428  min_lr: 0.001428  loss: 3.3482 (3.2723)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.7905 (0.9216)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [185]  [2400/2502]  eta: 0:00:34  lr: 0.001426  min_lr: 0.001426  loss: 3.4146 (3.2741)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8620 (0.9238)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [185]  [2501/2502]  eta: 0:00:00  lr: 0.001425  min_lr: 0.001425  loss: 3.4515 (3.2751)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8914 (0.9240)  time: 0.3009  data: 0.0009  max mem: 27255
Epoch: [185] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.001425  min_lr: 0.001425  loss: 3.4515 (3.2635)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8914 (0.9240)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.5770 (0.5770)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.2612  data: 3.0603  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9417 (0.9020)  acc1: 84.0000 (84.8000)  acc5: 97.2000 (96.9818)  time: 0.4758  data: 0.3085  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9300 (0.9176)  acc1: 82.8000 (83.9429)  acc5: 97.2000 (97.2571)  time: 0.1886  data: 0.0246  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0411 (1.0147)  acc1: 79.2000 (81.9355)  acc5: 94.4000 (95.9484)  time: 0.1922  data: 0.0280  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2489 (1.0742)  acc1: 76.4000 (80.3220)  acc5: 93.2000 (95.3561)  time: 0.2152  data: 0.0511  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2268 (1.0909)  acc1: 75.2000 (79.7120)  acc5: 93.6000 (95.2720)  time: 0.1950  data: 0.0312  max mem: 27255
Test: Total time: 0:00:12 (0.2582 s / it)
* Acc@1 79.672 Acc@5 95.194 loss 1.094
Accuracy of the model on the 50000 test images: 79.7%
Max accuracy: 79.71%
Epoch: [186]  [   0/2502]  eta: 1:33:16  lr: 0.001425  min_lr: 0.001425  loss: 2.2934 (2.2934)  weight_decay: 0.0500 (0.0500)  time: 2.2369  data: 1.3398  max mem: 27255
Epoch: [186]  [ 200/2502]  eta: 0:13:12  lr: 0.001424  min_lr: 0.001424  loss: 3.4482 (3.2206)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9013 (0.9467)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [186]  [ 400/2502]  eta: 0:11:54  lr: 0.001422  min_lr: 0.001422  loss: 3.3798 (3.2249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9505 (0.9399)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [186]  [ 600/2502]  eta: 0:10:43  lr: 0.001420  min_lr: 0.001420  loss: 3.0854 (3.2530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8880 (0.9326)  time: 0.3359  data: 0.0004  max mem: 27255
Epoch: [186]  [ 800/2502]  eta: 0:09:33  lr: 0.001419  min_lr: 0.001419  loss: 3.2998 (3.2516)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8447 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [186]  [1000/2502]  eta: 0:08:25  lr: 0.001417  min_lr: 0.001417  loss: 3.3958 (3.2589)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9272 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [186]  [1200/2502]  eta: 0:07:17  lr: 0.001415  min_lr: 0.001415  loss: 3.2743 (3.2501)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9187 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [186]  [1400/2502]  eta: 0:06:10  lr: 0.001413  min_lr: 0.001413  loss: 3.3597 (3.2532)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8529 (inf)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [186]  [1600/2502]  eta: 0:05:02  lr: 0.001412  min_lr: 0.001412  loss: 3.4649 (3.2541)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9868 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [186]  [1800/2502]  eta: 0:03:55  lr: 0.001410  min_lr: 0.001410  loss: 3.3941 (3.2585)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9216 (inf)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [186]  [2000/2502]  eta: 0:02:48  lr: 0.001408  min_lr: 0.001408  loss: 3.3713 (3.2600)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9178 (inf)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [186]  [2200/2502]  eta: 0:01:41  lr: 0.001407  min_lr: 0.001407  loss: 3.2669 (3.2637)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8903 (inf)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [186]  [2400/2502]  eta: 0:00:34  lr: 0.001405  min_lr: 0.001405  loss: 3.2911 (3.2604)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8875 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [186]  [2501/2502]  eta: 0:00:00  lr: 0.001404  min_lr: 0.001404  loss: 2.7791 (3.2608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8861 (inf)  time: 0.2999  data: 0.0010  max mem: 27255
Epoch: [186] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.001404  min_lr: 0.001404  loss: 2.7791 (3.2552)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8861 (inf)
Test:  [ 0/50]  eta: 0:02:16  loss: 0.5375 (0.5375)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 2.7204  data: 2.5159  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8060 (0.8139)  acc1: 86.8000 (85.4545)  acc5: 97.2000 (97.2000)  time: 0.4573  data: 0.2898  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.8178 (0.8330)  acc1: 82.0000 (84.2095)  acc5: 97.2000 (97.2571)  time: 0.2504  data: 0.0867  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9849 (0.9259)  acc1: 80.0000 (82.3484)  acc5: 96.0000 (96.0387)  time: 0.2499  data: 0.0862  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1464 (0.9839)  acc1: 76.0000 (80.5366)  acc5: 93.2000 (95.5220)  time: 0.1972  data: 0.0336  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1386 (1.0033)  acc1: 74.8000 (79.8640)  acc5: 93.6000 (95.3280)  time: 0.1640  data: 0.0005  max mem: 27255
Test: Total time: 0:00:13 (0.2652 s / it)
* Acc@1 79.710 Acc@5 95.298 loss 1.008
Accuracy of the model on the 50000 test images: 79.7%
Max accuracy: 79.71%
Epoch: [187]  [   0/2502]  eta: 1:21:50  lr: 0.001404  min_lr: 0.001404  loss: 3.7557 (3.7557)  weight_decay: 0.0500 (0.0500)  time: 1.9628  data: 1.6116  max mem: 27255
Epoch: [187]  [ 200/2502]  eta: 0:13:12  lr: 0.001402  min_lr: 0.001402  loss: 3.3409 (3.2465)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8800 (0.9395)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [187]  [ 400/2502]  eta: 0:11:52  lr: 0.001401  min_lr: 0.001401  loss: 3.2385 (3.2196)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8982 (0.9366)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [187]  [ 600/2502]  eta: 0:10:41  lr: 0.001399  min_lr: 0.001399  loss: 3.3590 (3.2201)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8532 (0.9393)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [187]  [ 800/2502]  eta: 0:09:32  lr: 0.001397  min_lr: 0.001397  loss: 3.3530 (3.2302)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8345 (0.9374)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [187]  [1000/2502]  eta: 0:08:24  lr: 0.001395  min_lr: 0.001395  loss: 3.4870 (3.2481)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8568 (0.9284)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [187]  [1200/2502]  eta: 0:07:16  lr: 0.001394  min_lr: 0.001394  loss: 3.3681 (3.2427)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9813 (0.9343)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [187]  [1400/2502]  eta: 0:06:09  lr: 0.001392  min_lr: 0.001392  loss: 3.4032 (3.2467)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8789 (0.9379)  time: 0.3445  data: 0.0004  max mem: 27255
Epoch: [187]  [1600/2502]  eta: 0:05:02  lr: 0.001390  min_lr: 0.001390  loss: 3.4519 (3.2573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9035 (0.9396)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [187]  [1800/2502]  eta: 0:03:55  lr: 0.001389  min_lr: 0.001389  loss: 3.4617 (3.2592)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9462 (0.9370)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [187]  [2000/2502]  eta: 0:02:48  lr: 0.001387  min_lr: 0.001387  loss: 3.4574 (3.2613)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9086 (0.9358)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [187]  [2200/2502]  eta: 0:01:41  lr: 0.001385  min_lr: 0.001385  loss: 3.3548 (3.2573)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9268 (0.9364)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [187]  [2400/2502]  eta: 0:00:34  lr: 0.001383  min_lr: 0.001383  loss: 3.4375 (3.2565)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9160 (0.9373)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [187]  [2501/2502]  eta: 0:00:00  lr: 0.001383  min_lr: 0.001383  loss: 3.4455 (3.2542)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9688 (0.9382)  time: 0.3004  data: 0.0008  max mem: 27255
Epoch: [187] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.001383  min_lr: 0.001383  loss: 3.4455 (3.2512)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9688 (0.9382)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.6130 (0.6130)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 2.6377  data: 2.4378  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9270 (0.9201)  acc1: 85.6000 (85.6364)  acc5: 96.8000 (96.9818)  time: 0.4858  data: 0.3186  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9270 (0.9396)  acc1: 83.6000 (84.2286)  acc5: 97.2000 (97.1429)  time: 0.2697  data: 0.1058  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0606 (1.0412)  acc1: 80.0000 (82.0774)  acc5: 95.2000 (95.9355)  time: 0.2461  data: 0.0798  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2850 (1.1018)  acc1: 76.0000 (80.1366)  acc5: 92.8000 (95.3463)  time: 0.2042  data: 0.0380  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3145 (1.1256)  acc1: 75.2000 (79.5280)  acc5: 93.6000 (95.2480)  time: 0.1755  data: 0.0119  max mem: 27255
Test: Total time: 0:00:13 (0.2746 s / it)
* Acc@1 79.656 Acc@5 95.308 loss 1.123
Accuracy of the model on the 50000 test images: 79.7%
Max accuracy: 79.71%
Epoch: [188]  [   0/2502]  eta: 1:20:50  lr: 0.001383  min_lr: 0.001383  loss: 2.7277 (2.7277)  weight_decay: 0.0500 (0.0500)  time: 1.9386  data: 1.4806  max mem: 27255
Epoch: [188]  [ 200/2502]  eta: 0:13:09  lr: 0.001381  min_lr: 0.001381  loss: 3.1452 (3.1814)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0195 (0.9850)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [188]  [ 400/2502]  eta: 0:11:51  lr: 0.001379  min_lr: 0.001379  loss: 3.2287 (3.1989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9804 (0.9635)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [188]  [ 600/2502]  eta: 0:10:40  lr: 0.001378  min_lr: 0.001378  loss: 3.3907 (3.2244)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0245 (0.9714)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [188]  [ 800/2502]  eta: 0:09:32  lr: 0.001376  min_lr: 0.001376  loss: 3.3382 (3.2109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9000 (0.9689)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [188]  [1000/2502]  eta: 0:08:24  lr: 0.001374  min_lr: 0.001374  loss: 3.4102 (3.2266)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9385 (0.9675)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [188]  [1200/2502]  eta: 0:07:17  lr: 0.001372  min_lr: 0.001372  loss: 3.3015 (3.2251)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9496 (0.9629)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [188]  [1400/2502]  eta: 0:06:09  lr: 0.001371  min_lr: 0.001371  loss: 3.3691 (3.2220)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8857 (0.9547)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [188]  [1600/2502]  eta: 0:05:02  lr: 0.001369  min_lr: 0.001369  loss: 3.6274 (3.2190)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9268 (0.9546)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [188]  [1800/2502]  eta: 0:03:55  lr: 0.001367  min_lr: 0.001367  loss: 3.4256 (3.2256)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9235 (0.9493)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [188]  [2000/2502]  eta: 0:02:48  lr: 0.001366  min_lr: 0.001366  loss: 3.3563 (3.2311)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9197 (0.9515)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [188]  [2200/2502]  eta: 0:01:41  lr: 0.001364  min_lr: 0.001364  loss: 3.2249 (3.2324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9170 (0.9517)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [188]  [2400/2502]  eta: 0:00:34  lr: 0.001362  min_lr: 0.001362  loss: 3.3882 (3.2359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9075 (0.9502)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [188]  [2501/2502]  eta: 0:00:00  lr: 0.001361  min_lr: 0.001361  loss: 3.3576 (3.2362)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9538 (0.9518)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [188] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.001361  min_lr: 0.001361  loss: 3.3576 (3.2422)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9538 (0.9518)
Test:  [ 0/50]  eta: 0:02:47  loss: 0.5932 (0.5932)  acc1: 92.8000 (92.8000)  acc5: 98.8000 (98.8000)  time: 3.3551  data: 3.1549  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8706 (0.8794)  acc1: 84.8000 (85.1273)  acc5: 97.2000 (97.0909)  time: 0.4623  data: 0.2940  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9167 (0.9195)  acc1: 82.4000 (84.1524)  acc5: 97.2000 (97.2381)  time: 0.1869  data: 0.0224  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0977 (1.0407)  acc1: 80.4000 (81.8581)  acc5: 95.6000 (95.7806)  time: 0.1933  data: 0.0294  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2624 (1.0969)  acc1: 75.2000 (80.2537)  acc5: 92.8000 (95.3268)  time: 0.2093  data: 0.0457  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2524 (1.1179)  acc1: 74.8000 (79.5920)  acc5: 94.0000 (95.2080)  time: 0.2095  data: 0.0459  max mem: 27255
Test: Total time: 0:00:12 (0.2579 s / it)
* Acc@1 79.756 Acc@5 95.252 loss 1.116
Accuracy of the model on the 50000 test images: 79.8%
Max accuracy: 79.76%
Epoch: [189]  [   0/2502]  eta: 1:21:03  lr: 0.001361  min_lr: 0.001361  loss: 3.5187 (3.5187)  weight_decay: 0.0500 (0.0500)  time: 1.9440  data: 1.6021  max mem: 27255
Epoch: [189]  [ 200/2502]  eta: 0:13:12  lr: 0.001360  min_lr: 0.001360  loss: 3.4222 (3.1928)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9286 (0.9538)  time: 0.3453  data: 0.0004  max mem: 27255
Epoch: [189]  [ 400/2502]  eta: 0:11:52  lr: 0.001358  min_lr: 0.001358  loss: 2.9666 (3.1762)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0464 (0.9632)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [189]  [ 600/2502]  eta: 0:10:41  lr: 0.001356  min_lr: 0.001356  loss: 3.4991 (3.1985)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9151 (0.9622)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [189]  [ 800/2502]  eta: 0:09:32  lr: 0.001355  min_lr: 0.001355  loss: 3.4121 (3.2248)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8785 (0.9552)  time: 0.3367  data: 0.0005  max mem: 27255
Epoch: [189]  [1000/2502]  eta: 0:08:25  lr: 0.001353  min_lr: 0.001353  loss: 3.2438 (3.2368)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8833 (0.9594)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [189]  [1200/2502]  eta: 0:07:17  lr: 0.001351  min_lr: 0.001351  loss: 3.4428 (3.2366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8169 (0.9526)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [189]  [1400/2502]  eta: 0:06:09  lr: 0.001349  min_lr: 0.001349  loss: 3.2739 (3.2353)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8662 (0.9504)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [189]  [1600/2502]  eta: 0:05:02  lr: 0.001348  min_lr: 0.001348  loss: 3.2077 (3.2421)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9208 (inf)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [189]  [1800/2502]  eta: 0:03:55  lr: 0.001346  min_lr: 0.001346  loss: 3.4415 (3.2451)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9914 (inf)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [189]  [2000/2502]  eta: 0:02:48  lr: 0.001344  min_lr: 0.001344  loss: 3.2679 (3.2407)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8821 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [189]  [2200/2502]  eta: 0:01:41  lr: 0.001343  min_lr: 0.001343  loss: 3.1496 (3.2393)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9102 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [189]  [2400/2502]  eta: 0:00:34  lr: 0.001341  min_lr: 0.001341  loss: 3.1843 (3.2423)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0108 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [189]  [2501/2502]  eta: 0:00:00  lr: 0.001340  min_lr: 0.001340  loss: 3.4554 (3.2428)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9663 (inf)  time: 0.2997  data: 0.0007  max mem: 27255
Epoch: [189] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.001340  min_lr: 0.001340  loss: 3.4554 (3.2366)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9663 (inf)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.5156 (0.5156)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 2.8904  data: 2.6846  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8804 (0.8950)  acc1: 84.8000 (84.9091)  acc5: 97.2000 (97.3455)  time: 0.4237  data: 0.2553  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9094 (0.9139)  acc1: 82.8000 (84.1714)  acc5: 97.6000 (97.3524)  time: 0.1802  data: 0.0161  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0338 (1.0173)  acc1: 79.2000 (82.0129)  acc5: 94.8000 (95.9097)  time: 0.1992  data: 0.0355  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2560 (1.0764)  acc1: 75.6000 (80.3902)  acc5: 93.2000 (95.4049)  time: 0.2046  data: 0.0409  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2485 (1.0957)  acc1: 74.8000 (79.7840)  acc5: 94.0000 (95.2960)  time: 0.1790  data: 0.0154  max mem: 27255
Test: Total time: 0:00:12 (0.2436 s / it)
* Acc@1 79.842 Acc@5 95.298 loss 1.094
Accuracy of the model on the 50000 test images: 79.8%
Max accuracy: 79.84%
Epoch: [190]  [   0/2502]  eta: 1:15:05  lr: 0.001340  min_lr: 0.001340  loss: 3.1271 (3.1271)  weight_decay: 0.0500 (0.0500)  time: 1.8006  data: 1.4515  max mem: 27255
Epoch: [190]  [ 200/2502]  eta: 0:13:06  lr: 0.001338  min_lr: 0.001338  loss: 3.1929 (3.2087)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9830 (0.9765)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [190]  [ 400/2502]  eta: 0:11:49  lr: 0.001337  min_lr: 0.001337  loss: 3.4387 (3.2399)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9596 (0.9809)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [190]  [ 600/2502]  eta: 0:10:39  lr: 0.001335  min_lr: 0.001335  loss: 3.1788 (3.2359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8166 (0.9642)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [190]  [ 800/2502]  eta: 0:09:31  lr: 0.001333  min_lr: 0.001333  loss: 3.1659 (3.2254)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9097 (0.9598)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [190]  [1000/2502]  eta: 0:08:23  lr: 0.001332  min_lr: 0.001332  loss: 3.0218 (3.2180)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9888 (0.9586)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [190]  [1200/2502]  eta: 0:07:16  lr: 0.001330  min_lr: 0.001330  loss: 3.0013 (3.2030)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9122 (0.9566)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [190]  [1400/2502]  eta: 0:06:09  lr: 0.001328  min_lr: 0.001328  loss: 3.3008 (3.2091)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0524 (0.9650)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [190]  [1600/2502]  eta: 0:05:02  lr: 0.001327  min_lr: 0.001327  loss: 3.3916 (3.2114)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9146 (0.9604)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [190]  [1800/2502]  eta: 0:03:54  lr: 0.001325  min_lr: 0.001325  loss: 3.1310 (3.2113)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9775 (0.9607)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [190]  [2000/2502]  eta: 0:02:48  lr: 0.001323  min_lr: 0.001323  loss: 3.2313 (3.2083)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9189 (0.9590)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [190]  [2200/2502]  eta: 0:01:41  lr: 0.001322  min_lr: 0.001322  loss: 3.3620 (3.2132)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9408 (0.9591)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [190]  [2400/2502]  eta: 0:00:34  lr: 0.001320  min_lr: 0.001320  loss: 3.2363 (3.2160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9688 (0.9643)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [190]  [2501/2502]  eta: 0:00:00  lr: 0.001319  min_lr: 0.001319  loss: 3.2432 (3.2152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8984 (0.9629)  time: 0.3029  data: 0.0010  max mem: 27255
Epoch: [190] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.001319  min_lr: 0.001319  loss: 3.2432 (3.2347)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8984 (0.9629)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.6316 (0.6316)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 3.0112  data: 2.8211  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9282 (0.9415)  acc1: 86.8000 (84.9455)  acc5: 97.6000 (97.0909)  time: 0.4345  data: 0.2681  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9610 (0.9646)  acc1: 83.2000 (84.1905)  acc5: 97.2000 (97.2762)  time: 0.1750  data: 0.0111  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0813 (1.0607)  acc1: 78.8000 (81.8194)  acc5: 95.2000 (95.9355)  time: 0.2067  data: 0.0429  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2663 (1.1107)  acc1: 76.0000 (80.1659)  acc5: 93.6000 (95.4342)  time: 0.2149  data: 0.0512  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2306 (1.1257)  acc1: 75.6000 (79.7040)  acc5: 94.0000 (95.2880)  time: 0.1767  data: 0.0131  max mem: 27255
Test: Total time: 0:00:12 (0.2476 s / it)
* Acc@1 79.784 Acc@5 95.244 loss 1.123
Accuracy of the model on the 50000 test images: 79.8%
Max accuracy: 79.84%
Epoch: [191]  [   0/2502]  eta: 1:29:11  lr: 0.001319  min_lr: 0.001319  loss: 2.9604 (2.9604)  weight_decay: 0.0500 (0.0500)  time: 2.1390  data: 1.7902  max mem: 27255
Epoch: [191]  [ 200/2502]  eta: 0:13:11  lr: 0.001317  min_lr: 0.001317  loss: 3.0619 (3.2377)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9042 (0.9234)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [191]  [ 400/2502]  eta: 0:11:53  lr: 0.001316  min_lr: 0.001316  loss: 3.2959 (3.2235)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8583 (0.9210)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [191]  [ 600/2502]  eta: 0:10:42  lr: 0.001314  min_lr: 0.001314  loss: 3.3107 (3.2157)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0604 (0.9396)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [191]  [ 800/2502]  eta: 0:09:33  lr: 0.001312  min_lr: 0.001312  loss: 3.3685 (3.2134)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9820 (0.9530)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [191]  [1000/2502]  eta: 0:08:25  lr: 0.001311  min_lr: 0.001311  loss: 3.3402 (3.2223)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9348 (0.9472)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [191]  [1200/2502]  eta: 0:07:17  lr: 0.001309  min_lr: 0.001309  loss: 3.3542 (3.2222)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0034 (0.9549)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [191]  [1400/2502]  eta: 0:06:10  lr: 0.001307  min_lr: 0.001307  loss: 3.0526 (3.2121)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0235 (0.9603)  time: 0.3353  data: 0.0003  max mem: 27255
Epoch: [191]  [1600/2502]  eta: 0:05:02  lr: 0.001305  min_lr: 0.001305  loss: 3.1538 (3.2090)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8967 (0.9603)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [191]  [1800/2502]  eta: 0:03:55  lr: 0.001304  min_lr: 0.001304  loss: 3.3606 (3.2044)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9338 (0.9620)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [191]  [2000/2502]  eta: 0:02:48  lr: 0.001302  min_lr: 0.001302  loss: 3.3668 (3.2049)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9249 (0.9620)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [191]  [2200/2502]  eta: 0:01:41  lr: 0.001300  min_lr: 0.001300  loss: 3.1774 (3.2082)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8651 (0.9592)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [191]  [2400/2502]  eta: 0:00:34  lr: 0.001299  min_lr: 0.001299  loss: 3.4670 (3.2125)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9556 (0.9622)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [191]  [2501/2502]  eta: 0:00:00  lr: 0.001298  min_lr: 0.001298  loss: 3.3170 (3.2124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8851 (0.9603)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [191] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.001298  min_lr: 0.001298  loss: 3.3170 (3.2280)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8851 (0.9603)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.6562 (0.6562)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.8811  data: 2.6852  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9386 (0.9211)  acc1: 84.4000 (84.9818)  acc5: 97.6000 (97.2364)  time: 0.4341  data: 0.2672  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9365 (0.9257)  acc1: 83.6000 (84.2476)  acc5: 97.2000 (97.3333)  time: 0.2240  data: 0.0584  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0869 (1.0189)  acc1: 80.4000 (81.9613)  acc5: 95.2000 (95.9484)  time: 0.2581  data: 0.0925  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2362 (1.0788)  acc1: 75.6000 (80.2439)  acc5: 92.8000 (95.2585)  time: 0.2127  data: 0.0486  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2154 (1.0933)  acc1: 75.6000 (79.5280)  acc5: 93.6000 (95.1760)  time: 0.1672  data: 0.0032  max mem: 27255
Test: Total time: 0:00:13 (0.2642 s / it)
* Acc@1 79.758 Acc@5 95.220 loss 1.093
Accuracy of the model on the 50000 test images: 79.8%
Max accuracy: 79.84%
Epoch: [192]  [   0/2502]  eta: 1:14:14  lr: 0.001298  min_lr: 0.001298  loss: 3.2059 (3.2059)  weight_decay: 0.0500 (0.0500)  time: 1.7803  data: 1.4557  max mem: 27255
Epoch: [192]  [ 200/2502]  eta: 0:13:06  lr: 0.001296  min_lr: 0.001296  loss: 3.3722 (3.1873)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9779 (0.9639)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [192]  [ 400/2502]  eta: 0:11:50  lr: 0.001295  min_lr: 0.001295  loss: 3.4478 (3.2057)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0133 (0.9838)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [192]  [ 600/2502]  eta: 0:10:42  lr: 0.001293  min_lr: 0.001293  loss: 3.0028 (3.2087)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8643 (0.9793)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [192]  [ 800/2502]  eta: 0:09:32  lr: 0.001291  min_lr: 0.001291  loss: 3.4519 (3.2182)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0199 (0.9863)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [192]  [1000/2502]  eta: 0:08:24  lr: 0.001290  min_lr: 0.001290  loss: 3.1897 (3.2106)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9387 (0.9852)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [192]  [1200/2502]  eta: 0:07:16  lr: 0.001288  min_lr: 0.001288  loss: 3.0708 (3.2133)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9523 (0.9768)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [192]  [1400/2502]  eta: 0:06:09  lr: 0.001286  min_lr: 0.001286  loss: 2.8203 (3.2160)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9152 (0.9717)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [192]  [1600/2502]  eta: 0:05:02  lr: 0.001285  min_lr: 0.001285  loss: 3.1476 (3.2153)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9537 (0.9713)  time: 0.3325  data: 0.0003  max mem: 27255
Epoch: [192]  [1800/2502]  eta: 0:03:55  lr: 0.001283  min_lr: 0.001283  loss: 2.9915 (3.2158)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9700 (0.9697)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [192]  [2000/2502]  eta: 0:02:48  lr: 0.001281  min_lr: 0.001281  loss: 3.4160 (3.2193)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8821 (0.9655)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [192]  [2200/2502]  eta: 0:01:41  lr: 0.001279  min_lr: 0.001279  loss: 3.2418 (3.2152)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0064 (0.9667)  time: 0.3327  data: 0.0003  max mem: 27255
Epoch: [192]  [2400/2502]  eta: 0:00:34  lr: 0.001278  min_lr: 0.001278  loss: 3.2242 (3.2148)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0080 (0.9667)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [192]  [2501/2502]  eta: 0:00:00  lr: 0.001277  min_lr: 0.001277  loss: 3.1676 (3.2142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9709 (0.9673)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [192] Total time: 0:13:56 (0.3344 s / it)
Averaged stats: lr: 0.001277  min_lr: 0.001277  loss: 3.1676 (3.2228)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9709 (0.9673)
Test:  [ 0/50]  eta: 0:02:39  loss: 0.6914 (0.6914)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.1803  data: 2.9807  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8545 (0.9257)  acc1: 87.6000 (85.3455)  acc5: 97.2000 (97.0182)  time: 0.4709  data: 0.3037  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9207 (0.9517)  acc1: 83.6000 (84.2476)  acc5: 97.2000 (97.1048)  time: 0.2166  data: 0.0527  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0914 (1.0497)  acc1: 79.6000 (82.2194)  acc5: 95.6000 (95.8581)  time: 0.2012  data: 0.0373  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2575 (1.1038)  acc1: 76.8000 (80.6244)  acc5: 93.6000 (95.4439)  time: 0.1674  data: 0.0036  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2100 (1.1167)  acc1: 76.8000 (80.0480)  acc5: 94.4000 (95.4320)  time: 0.1672  data: 0.0034  max mem: 27255
Test: Total time: 0:00:12 (0.2489 s / it)
* Acc@1 79.924 Acc@5 95.418 loss 1.116
Accuracy of the model on the 50000 test images: 79.9%
Max accuracy: 79.92%
Epoch: [193]  [   0/2502]  eta: 1:22:07  lr: 0.001277  min_lr: 0.001277  loss: 3.5026 (3.5026)  weight_decay: 0.0500 (0.0500)  time: 1.9695  data: 1.6232  max mem: 27255
Epoch: [193]  [ 200/2502]  eta: 0:13:07  lr: 0.001275  min_lr: 0.001275  loss: 3.4205 (3.1945)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9405 (0.9624)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [193]  [ 400/2502]  eta: 0:11:52  lr: 0.001274  min_lr: 0.001274  loss: 2.9680 (3.1616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9380 (0.9617)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [193]  [ 600/2502]  eta: 0:10:41  lr: 0.001272  min_lr: 0.001272  loss: 3.4891 (3.2012)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9431 (0.9636)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [193]  [ 800/2502]  eta: 0:09:32  lr: 0.001270  min_lr: 0.001270  loss: 3.5236 (3.2002)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9301 (0.9643)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [193]  [1000/2502]  eta: 0:08:24  lr: 0.001269  min_lr: 0.001269  loss: 3.3559 (3.2229)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9269 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [193]  [1200/2502]  eta: 0:07:16  lr: 0.001267  min_lr: 0.001267  loss: 3.3118 (3.2178)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9454 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [193]  [1400/2502]  eta: 0:06:09  lr: 0.001265  min_lr: 0.001265  loss: 3.4245 (3.2189)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8991 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [193]  [1600/2502]  eta: 0:05:02  lr: 0.001264  min_lr: 0.001264  loss: 3.5177 (3.2260)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8509 (inf)  time: 0.3416  data: 0.0005  max mem: 27255
Epoch: [193]  [1800/2502]  eta: 0:03:55  lr: 0.001262  min_lr: 0.001262  loss: 3.2969 (3.2190)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0232 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [193]  [2000/2502]  eta: 0:02:48  lr: 0.001260  min_lr: 0.001260  loss: 3.2513 (3.2188)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0011 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [193]  [2200/2502]  eta: 0:01:41  lr: 0.001259  min_lr: 0.001259  loss: 3.4851 (3.2240)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8993 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [193]  [2400/2502]  eta: 0:00:34  lr: 0.001257  min_lr: 0.001257  loss: 3.4311 (3.2257)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9477 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [193]  [2501/2502]  eta: 0:00:00  lr: 0.001256  min_lr: 0.001256  loss: 3.0578 (3.2255)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9522 (inf)  time: 0.3010  data: 0.0009  max mem: 27255
Epoch: [193] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.001256  min_lr: 0.001256  loss: 3.0578 (3.2165)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9522 (inf)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.4952 (0.4952)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 2.9417  data: 2.7344  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7832 (0.8367)  acc1: 88.8000 (85.4182)  acc5: 97.6000 (97.4182)  time: 0.4501  data: 0.2822  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8361 (0.8542)  acc1: 84.0000 (84.2095)  acc5: 97.2000 (97.5429)  time: 0.2218  data: 0.0575  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9726 (0.9449)  acc1: 80.8000 (82.4516)  acc5: 95.2000 (96.1161)  time: 0.2072  data: 0.0414  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1629 (1.0071)  acc1: 76.8000 (80.7220)  acc5: 93.2000 (95.4146)  time: 0.1952  data: 0.0298  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1629 (1.0269)  acc1: 74.8000 (80.0560)  acc5: 94.0000 (95.3440)  time: 0.1932  data: 0.0297  max mem: 27255
Test: Total time: 0:00:12 (0.2570 s / it)
* Acc@1 80.212 Acc@5 95.372 loss 1.027
Accuracy of the model on the 50000 test images: 80.2%
Max accuracy: 80.21%
Epoch: [194]  [   0/2502]  eta: 1:11:25  lr: 0.001256  min_lr: 0.001256  loss: 3.1180 (3.1180)  weight_decay: 0.0500 (0.0500)  time: 1.7128  data: 1.3693  max mem: 27255
Epoch: [194]  [ 200/2502]  eta: 0:13:06  lr: 0.001254  min_lr: 0.001254  loss: 3.3823 (3.2371)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9384 (0.9229)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [194]  [ 400/2502]  eta: 0:11:50  lr: 0.001253  min_lr: 0.001253  loss: 3.3459 (3.2276)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9561 (0.9573)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [194]  [ 600/2502]  eta: 0:10:40  lr: 0.001251  min_lr: 0.001251  loss: 3.3977 (3.2304)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9720 (0.9681)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [194]  [ 800/2502]  eta: 0:09:32  lr: 0.001249  min_lr: 0.001249  loss: 3.2047 (3.2176)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9442 (0.9612)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [194]  [1000/2502]  eta: 0:08:24  lr: 0.001248  min_lr: 0.001248  loss: 3.4391 (3.2268)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9920 (0.9724)  time: 0.3344  data: 0.0003  max mem: 27255
Epoch: [194]  [1200/2502]  eta: 0:07:16  lr: 0.001246  min_lr: 0.001246  loss: 3.2025 (3.2201)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0012 (0.9735)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [194]  [1400/2502]  eta: 0:06:09  lr: 0.001244  min_lr: 0.001244  loss: 3.2689 (3.2136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9300 (0.9721)  time: 0.3345  data: 0.0003  max mem: 27255
Epoch: [194]  [1600/2502]  eta: 0:05:02  lr: 0.001243  min_lr: 0.001243  loss: 3.3433 (3.2118)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9418 (0.9684)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [194]  [1800/2502]  eta: 0:03:55  lr: 0.001241  min_lr: 0.001241  loss: 3.3111 (3.2136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8990 (0.9631)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [194]  [2000/2502]  eta: 0:02:48  lr: 0.001239  min_lr: 0.001239  loss: 3.3007 (3.2142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9869 (0.9660)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [194]  [2200/2502]  eta: 0:01:41  lr: 0.001238  min_lr: 0.001238  loss: 3.4992 (3.2127)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9051 (0.9657)  time: 0.3363  data: 0.0004  max mem: 27255
Epoch: [194]  [2400/2502]  eta: 0:00:34  lr: 0.001236  min_lr: 0.001236  loss: 3.4268 (3.2154)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9748 (0.9668)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [194]  [2501/2502]  eta: 0:00:00  lr: 0.001235  min_lr: 0.001235  loss: 3.2181 (3.2182)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9725 (0.9687)  time: 0.3006  data: 0.0009  max mem: 27255
Epoch: [194] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.001235  min_lr: 0.001235  loss: 3.2181 (3.2136)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9725 (0.9687)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.5349 (0.5349)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.8107  data: 2.6104  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9155 (0.8818)  acc1: 85.2000 (85.0909)  acc5: 98.0000 (97.5273)  time: 0.4341  data: 0.2668  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8830 (0.8880)  acc1: 83.2000 (84.2286)  acc5: 97.2000 (97.3524)  time: 0.2348  data: 0.0708  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0704 (0.9866)  acc1: 79.2000 (82.1419)  acc5: 94.8000 (96.1806)  time: 0.2335  data: 0.0693  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2237 (1.0464)  acc1: 76.4000 (80.4390)  acc5: 92.8000 (95.4732)  time: 0.1991  data: 0.0333  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2593 (1.0653)  acc1: 76.0000 (79.9680)  acc5: 93.6000 (95.3440)  time: 0.1844  data: 0.0187  max mem: 27255
Test: Total time: 0:00:13 (0.2613 s / it)
* Acc@1 80.098 Acc@5 95.404 loss 1.063
Accuracy of the model on the 50000 test images: 80.1%
Max accuracy: 80.21%
Epoch: [195]  [   0/2502]  eta: 1:17:05  lr: 0.001235  min_lr: 0.001235  loss: 3.5846 (3.5846)  weight_decay: 0.0500 (0.0500)  time: 1.8486  data: 1.5243  max mem: 27255
Epoch: [195]  [ 200/2502]  eta: 0:13:10  lr: 0.001234  min_lr: 0.001234  loss: 3.1337 (3.1800)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9432 (0.9710)  time: 0.3365  data: 0.0005  max mem: 27255
Epoch: [195]  [ 400/2502]  eta: 0:11:52  lr: 0.001232  min_lr: 0.001232  loss: 3.0722 (3.1817)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9964 (0.9971)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [195]  [ 600/2502]  eta: 0:10:41  lr: 0.001230  min_lr: 0.001230  loss: 3.2800 (3.1884)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9055 (0.9900)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [195]  [ 800/2502]  eta: 0:09:33  lr: 0.001229  min_lr: 0.001229  loss: 3.4229 (3.1999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9109 (0.9902)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [195]  [1000/2502]  eta: 0:08:24  lr: 0.001227  min_lr: 0.001227  loss: 3.4183 (3.1956)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9660 (0.9988)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [195]  [1200/2502]  eta: 0:07:17  lr: 0.001225  min_lr: 0.001225  loss: 3.2781 (3.1947)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9390 (0.9923)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [195]  [1400/2502]  eta: 0:06:09  lr: 0.001224  min_lr: 0.001224  loss: 3.1010 (3.2109)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9718 (0.9936)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [195]  [1600/2502]  eta: 0:05:02  lr: 0.001222  min_lr: 0.001222  loss: 3.0799 (3.2071)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9986 (0.9982)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [195]  [1800/2502]  eta: 0:03:55  lr: 0.001220  min_lr: 0.001220  loss: 3.1959 (3.2084)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9787 (0.9957)  time: 0.3326  data: 0.0005  max mem: 27255
Epoch: [195]  [2000/2502]  eta: 0:02:48  lr: 0.001219  min_lr: 0.001219  loss: 3.4246 (3.2117)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9568 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [195]  [2200/2502]  eta: 0:01:41  lr: 0.001217  min_lr: 0.001217  loss: 3.4014 (3.2181)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9748 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [195]  [2400/2502]  eta: 0:00:34  lr: 0.001215  min_lr: 0.001215  loss: 3.3477 (3.2204)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9173 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [195]  [2501/2502]  eta: 0:00:00  lr: 0.001215  min_lr: 0.001215  loss: 3.4274 (3.2238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0138 (inf)  time: 0.2998  data: 0.0009  max mem: 27255
Epoch: [195] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.001215  min_lr: 0.001215  loss: 3.4274 (3.2189)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0138 (inf)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.6174 (0.6174)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 2.8179  data: 2.6122  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.9702 (0.9325)  acc1: 85.2000 (84.6909)  acc5: 98.4000 (97.5636)  time: 0.4194  data: 0.2507  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9212 (0.9313)  acc1: 83.2000 (84.3048)  acc5: 98.0000 (97.6191)  time: 0.1919  data: 0.0273  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1230 (1.0400)  acc1: 80.0000 (82.0129)  acc5: 95.6000 (96.1677)  time: 0.2188  data: 0.0547  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2882 (1.0933)  acc1: 76.4000 (80.3512)  acc5: 92.8000 (95.6390)  time: 0.2136  data: 0.0498  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2607 (1.1146)  acc1: 74.8000 (79.6960)  acc5: 94.0000 (95.5680)  time: 0.1911  data: 0.0274  max mem: 27255
Test: Total time: 0:00:12 (0.2499 s / it)
* Acc@1 79.912 Acc@5 95.326 loss 1.114
Accuracy of the model on the 50000 test images: 79.9%
Max accuracy: 80.21%
Epoch: [196]  [   0/2502]  eta: 1:28:33  lr: 0.001215  min_lr: 0.001215  loss: 3.7716 (3.7716)  weight_decay: 0.0500 (0.0500)  time: 2.1237  data: 1.4244  max mem: 27255
Epoch: [196]  [ 200/2502]  eta: 0:13:09  lr: 0.001213  min_lr: 0.001213  loss: 3.1831 (3.1891)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9767 (1.0132)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [196]  [ 400/2502]  eta: 0:11:51  lr: 0.001211  min_lr: 0.001211  loss: 2.8997 (3.2096)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8991 (0.9788)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [196]  [ 600/2502]  eta: 0:10:41  lr: 0.001210  min_lr: 0.001210  loss: 3.2596 (3.2077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9651 (0.9815)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [196]  [ 800/2502]  eta: 0:09:32  lr: 0.001208  min_lr: 0.001208  loss: 3.2981 (3.2105)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9108 (0.9872)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [196]  [1000/2502]  eta: 0:08:24  lr: 0.001206  min_lr: 0.001206  loss: 3.3914 (3.2077)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9389 (0.9766)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [196]  [1200/2502]  eta: 0:07:17  lr: 0.001205  min_lr: 0.001205  loss: 3.2650 (3.2007)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9475 (0.9768)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [196]  [1400/2502]  eta: 0:06:10  lr: 0.001203  min_lr: 0.001203  loss: 3.2808 (3.2023)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0511 (0.9812)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [196]  [1600/2502]  eta: 0:05:02  lr: 0.001201  min_lr: 0.001201  loss: 3.3214 (3.2023)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1449 (0.9961)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [196]  [1800/2502]  eta: 0:03:55  lr: 0.001200  min_lr: 0.001200  loss: 3.3530 (3.2029)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9865 (1.0042)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [196]  [2000/2502]  eta: 0:02:48  lr: 0.001198  min_lr: 0.001198  loss: 3.0205 (3.2004)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9567 (1.0050)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [196]  [2200/2502]  eta: 0:01:41  lr: 0.001197  min_lr: 0.001197  loss: 3.1072 (3.2009)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0075 (1.0036)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [196]  [2400/2502]  eta: 0:00:34  lr: 0.001195  min_lr: 0.001195  loss: 3.3203 (3.2026)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9492 (1.0041)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [196]  [2501/2502]  eta: 0:00:00  lr: 0.001194  min_lr: 0.001194  loss: 3.2901 (3.2045)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0993 (1.0068)  time: 0.3008  data: 0.0009  max mem: 27255
Epoch: [196] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.001194  min_lr: 0.001194  loss: 3.2901 (3.2065)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0993 (1.0068)
Test:  [ 0/50]  eta: 0:03:03  loss: 0.6107 (0.6107)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.6671  data: 3.4742  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9372 (0.9378)  acc1: 84.8000 (85.0545)  acc5: 97.6000 (97.3091)  time: 0.4827  data: 0.3162  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9625 (0.9696)  acc1: 82.4000 (83.8476)  acc5: 97.6000 (97.4286)  time: 0.1643  data: 0.0004  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.1704 (1.0734)  acc1: 78.8000 (82.0387)  acc5: 95.2000 (96.0903)  time: 0.1941  data: 0.0301  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3221 (1.1355)  acc1: 77.2000 (80.5951)  acc5: 92.8000 (95.4634)  time: 0.2004  data: 0.0365  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.3119 (1.1613)  acc1: 76.4000 (79.7840)  acc5: 93.6000 (95.3680)  time: 0.2140  data: 0.0502  max mem: 27255
Test: Total time: 0:00:12 (0.2561 s / it)
* Acc@1 79.884 Acc@5 95.338 loss 1.162
Accuracy of the model on the 50000 test images: 79.9%
Max accuracy: 80.21%
Epoch: [197]  [   0/2502]  eta: 1:22:29  lr: 0.001194  min_lr: 0.001194  loss: 3.2347 (3.2347)  weight_decay: 0.0500 (0.0500)  time: 1.9782  data: 1.6282  max mem: 27255
Epoch: [197]  [ 200/2502]  eta: 0:13:08  lr: 0.001192  min_lr: 0.001192  loss: 3.2591 (3.1992)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9484 (0.9378)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [197]  [ 400/2502]  eta: 0:11:51  lr: 0.001191  min_lr: 0.001191  loss: 3.4142 (3.2267)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9397 (0.9642)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [197]  [ 600/2502]  eta: 0:10:41  lr: 0.001189  min_lr: 0.001189  loss: 3.2015 (3.2239)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8836 (0.9619)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [197]  [ 800/2502]  eta: 0:09:33  lr: 0.001188  min_lr: 0.001188  loss: 3.3900 (3.2291)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9772 (0.9740)  time: 0.3342  data: 0.0003  max mem: 27255
Epoch: [197]  [1000/2502]  eta: 0:08:25  lr: 0.001186  min_lr: 0.001186  loss: 3.3852 (3.2248)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9758 (0.9807)  time: 0.3346  data: 0.0003  max mem: 27255
Epoch: [197]  [1200/2502]  eta: 0:07:17  lr: 0.001184  min_lr: 0.001184  loss: 3.5046 (3.2122)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9507 (0.9804)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [197]  [1400/2502]  eta: 0:06:10  lr: 0.001183  min_lr: 0.001183  loss: 3.5423 (3.2152)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9812 (0.9805)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [197]  [1600/2502]  eta: 0:05:02  lr: 0.001181  min_lr: 0.001181  loss: 3.1686 (3.2168)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8666 (0.9796)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [197]  [1800/2502]  eta: 0:03:55  lr: 0.001179  min_lr: 0.001179  loss: 3.4787 (3.2124)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9994 (0.9800)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [197]  [2000/2502]  eta: 0:02:48  lr: 0.001178  min_lr: 0.001178  loss: 3.2375 (3.2119)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9546 (0.9812)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [197]  [2200/2502]  eta: 0:01:41  lr: 0.001176  min_lr: 0.001176  loss: 3.4047 (3.2149)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0383 (0.9893)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [197]  [2400/2502]  eta: 0:00:34  lr: 0.001174  min_lr: 0.001174  loss: 3.1737 (3.2056)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9444 (0.9865)  time: 0.3438  data: 0.0006  max mem: 27255
Epoch: [197]  [2501/2502]  eta: 0:00:00  lr: 0.001174  min_lr: 0.001174  loss: 3.1780 (3.2054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0132 (0.9901)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [197] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.001174  min_lr: 0.001174  loss: 3.1780 (3.2032)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0132 (0.9901)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.6116 (0.6116)  acc1: 93.2000 (93.2000)  acc5: 99.2000 (99.2000)  time: 3.0398  data: 2.8566  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.8660 (0.8848)  acc1: 85.6000 (84.8364)  acc5: 97.6000 (97.2000)  time: 0.5029  data: 0.3372  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8777 (0.8954)  acc1: 83.6000 (84.1905)  acc5: 97.2000 (97.1810)  time: 0.2195  data: 0.0556  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0279 (0.9939)  acc1: 80.0000 (82.1032)  acc5: 95.2000 (96.0000)  time: 0.2011  data: 0.0372  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2112 (1.0561)  acc1: 76.4000 (80.5171)  acc5: 93.2000 (95.5122)  time: 0.1958  data: 0.0304  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2112 (1.0709)  acc1: 75.2000 (79.9680)  acc5: 94.4000 (95.4560)  time: 0.1957  data: 0.0302  max mem: 27255
Test: Total time: 0:00:12 (0.2586 s / it)
* Acc@1 80.056 Acc@5 95.338 loss 1.075
Accuracy of the model on the 50000 test images: 80.1%
Max accuracy: 80.21%
Epoch: [198]  [   0/2502]  eta: 1:32:44  lr: 0.001174  min_lr: 0.001174  loss: 2.1961 (2.1961)  weight_decay: 0.0500 (0.0500)  time: 2.2242  data: 1.8575  max mem: 27255
Epoch: [198]  [ 200/2502]  eta: 0:13:09  lr: 0.001172  min_lr: 0.001172  loss: 3.1774 (3.1644)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9965 (1.0678)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [198]  [ 400/2502]  eta: 0:11:51  lr: 0.001170  min_lr: 0.001170  loss: 3.1669 (3.1968)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9322 (1.0219)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [198]  [ 600/2502]  eta: 0:10:41  lr: 0.001169  min_lr: 0.001169  loss: 3.1885 (3.1999)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9768 (1.0238)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [198]  [ 800/2502]  eta: 0:09:32  lr: 0.001167  min_lr: 0.001167  loss: 3.3783 (3.1989)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9448 (1.0094)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [198]  [1000/2502]  eta: 0:08:24  lr: 0.001165  min_lr: 0.001165  loss: 3.1267 (3.2023)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0361 (1.0083)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [198]  [1200/2502]  eta: 0:07:17  lr: 0.001164  min_lr: 0.001164  loss: 3.4156 (3.1904)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0107 (1.0032)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [198]  [1400/2502]  eta: 0:06:09  lr: 0.001162  min_lr: 0.001162  loss: 3.2885 (3.1920)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0124 (1.0055)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [198]  [1600/2502]  eta: 0:05:02  lr: 0.001161  min_lr: 0.001161  loss: 2.9894 (3.1943)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1069 (1.0045)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [198]  [1800/2502]  eta: 0:03:55  lr: 0.001159  min_lr: 0.001159  loss: 3.0856 (3.1967)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9322 (1.0039)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [198]  [2000/2502]  eta: 0:02:48  lr: 0.001157  min_lr: 0.001157  loss: 3.3914 (3.1983)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9535 (1.0067)  time: 0.3351  data: 0.0003  max mem: 27255
Epoch: [198]  [2200/2502]  eta: 0:01:41  lr: 0.001156  min_lr: 0.001156  loss: 3.2983 (3.1970)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9696 (1.0014)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [198]  [2400/2502]  eta: 0:00:34  lr: 0.001154  min_lr: 0.001154  loss: 3.4002 (3.1942)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1315 (1.0012)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [198]  [2501/2502]  eta: 0:00:00  lr: 0.001153  min_lr: 0.001153  loss: 3.2567 (3.1920)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9827 (1.0021)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [198] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.001153  min_lr: 0.001153  loss: 3.2567 (3.1896)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9827 (1.0021)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.6573 (0.6573)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.1374  data: 2.9428  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.9507 (0.9358)  acc1: 85.2000 (85.5273)  acc5: 97.6000 (97.2000)  time: 0.4813  data: 0.3114  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.8774 (0.9367)  acc1: 83.2000 (84.6476)  acc5: 97.2000 (97.2381)  time: 0.2318  data: 0.0653  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1022 (1.0299)  acc1: 80.0000 (82.4387)  acc5: 94.8000 (96.0903)  time: 0.2523  data: 0.0861  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2476 (1.0825)  acc1: 76.4000 (80.8390)  acc5: 93.6000 (95.6098)  time: 0.2138  data: 0.0450  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2463 (1.1039)  acc1: 76.0000 (80.0320)  acc5: 94.4000 (95.5680)  time: 0.1682  data: 0.0001  max mem: 27255
Test: Total time: 0:00:13 (0.2729 s / it)
* Acc@1 80.288 Acc@5 95.464 loss 1.105
Accuracy of the model on the 50000 test images: 80.3%
Max accuracy: 80.29%
Epoch: [199]  [   0/2502]  eta: 1:30:12  lr: 0.001153  min_lr: 0.001153  loss: 3.8909 (3.8909)  weight_decay: 0.0500 (0.0500)  time: 2.1631  data: 1.8134  max mem: 27255
Epoch: [199]  [ 200/2502]  eta: 0:13:10  lr: 0.001152  min_lr: 0.001152  loss: 3.1340 (3.2223)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9838 (1.0818)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [199]  [ 400/2502]  eta: 0:11:51  lr: 0.001150  min_lr: 0.001150  loss: 3.3154 (3.2156)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [199]  [ 600/2502]  eta: 0:10:41  lr: 0.001148  min_lr: 0.001148  loss: 2.9691 (3.2027)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0211 (nan)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [199]  [ 800/2502]  eta: 0:09:32  lr: 0.001147  min_lr: 0.001147  loss: 3.1513 (3.2017)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9523 (nan)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [199]  [1000/2502]  eta: 0:08:25  lr: 0.001145  min_lr: 0.001145  loss: 3.3616 (3.2019)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0071 (nan)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [199]  [1200/2502]  eta: 0:07:18  lr: 0.001143  min_lr: 0.001143  loss: 3.4909 (3.2088)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9645 (nan)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [199]  [1400/2502]  eta: 0:06:10  lr: 0.001142  min_lr: 0.001142  loss: 3.2318 (3.2064)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9840 (nan)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [199]  [1600/2502]  eta: 0:05:02  lr: 0.001140  min_lr: 0.001140  loss: 3.3471 (3.2068)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9798 (nan)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [199]  [1800/2502]  eta: 0:03:55  lr: 0.001139  min_lr: 0.001139  loss: 3.1592 (3.2035)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0434 (nan)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [199]  [2000/2502]  eta: 0:02:48  lr: 0.001137  min_lr: 0.001137  loss: 3.3129 (3.2024)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9657 (nan)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [199]  [2200/2502]  eta: 0:01:41  lr: 0.001135  min_lr: 0.001135  loss: 3.4066 (3.2042)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9816 (nan)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [199]  [2400/2502]  eta: 0:00:34  lr: 0.001134  min_lr: 0.001134  loss: 3.0463 (3.2009)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0127 (nan)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [199]  [2501/2502]  eta: 0:00:00  lr: 0.001133  min_lr: 0.001133  loss: 3.3128 (3.1990)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0039 (nan)  time: 0.3016  data: 0.0009  max mem: 27255
Epoch: [199] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.001133  min_lr: 0.001133  loss: 3.3128 (3.1913)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0039 (nan)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.6178 (0.6178)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.2367  data: 3.0404  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8762 (0.9073)  acc1: 85.2000 (85.0909)  acc5: 98.0000 (97.3455)  time: 0.4490  data: 0.2821  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8967 (0.9161)  acc1: 82.8000 (84.2476)  acc5: 97.6000 (97.2952)  time: 0.1947  data: 0.0308  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0769 (1.0047)  acc1: 80.4000 (82.3355)  acc5: 95.6000 (96.2194)  time: 0.2576  data: 0.0938  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2211 (1.0597)  acc1: 75.6000 (80.7610)  acc5: 93.6000 (95.6878)  time: 0.2502  data: 0.0865  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2211 (1.0792)  acc1: 75.6000 (80.1040)  acc5: 93.6000 (95.4560)  time: 0.1840  data: 0.0205  max mem: 27255
Test: Total time: 0:00:13 (0.2742 s / it)
* Acc@1 80.228 Acc@5 95.400 loss 1.083
Accuracy of the model on the 50000 test images: 80.2%
Max accuracy: 80.29%
Epoch: [200]  [   0/2502]  eta: 1:16:32  lr: 0.001133  min_lr: 0.001133  loss: 2.5013 (2.5013)  weight_decay: 0.0500 (0.0500)  time: 1.8354  data: 1.3237  max mem: 27255
Epoch: [200]  [ 200/2502]  eta: 0:13:10  lr: 0.001131  min_lr: 0.001131  loss: 3.1335 (3.1139)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9794 (1.0183)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [200]  [ 400/2502]  eta: 0:11:51  lr: 0.001130  min_lr: 0.001130  loss: 3.2638 (3.1169)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0298 (1.0052)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [200]  [ 600/2502]  eta: 0:10:41  lr: 0.001128  min_lr: 0.001128  loss: 3.3040 (3.1456)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0341 (1.0117)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [200]  [ 800/2502]  eta: 0:09:32  lr: 0.001126  min_lr: 0.001126  loss: 3.1469 (3.1490)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0180 (1.0169)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [200]  [1000/2502]  eta: 0:08:24  lr: 0.001125  min_lr: 0.001125  loss: 3.3231 (3.1636)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9608 (1.0140)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [200]  [1200/2502]  eta: 0:07:16  lr: 0.001123  min_lr: 0.001123  loss: 3.2868 (3.1690)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0767 (1.0244)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [200]  [1400/2502]  eta: 0:06:09  lr: 0.001122  min_lr: 0.001122  loss: 3.1206 (3.1722)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9062 (1.0221)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [200]  [1600/2502]  eta: 0:05:02  lr: 0.001120  min_lr: 0.001120  loss: 3.0949 (3.1681)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8968 (1.0147)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [200]  [1800/2502]  eta: 0:03:55  lr: 0.001118  min_lr: 0.001118  loss: 3.4529 (3.1735)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0079 (1.0160)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [200]  [2000/2502]  eta: 0:02:48  lr: 0.001117  min_lr: 0.001117  loss: 3.2141 (3.1714)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9937 (1.0158)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [200]  [2200/2502]  eta: 0:01:41  lr: 0.001115  min_lr: 0.001115  loss: 3.3100 (3.1712)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0209 (1.0147)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [200]  [2400/2502]  eta: 0:00:34  lr: 0.001114  min_lr: 0.001114  loss: 3.2668 (3.1707)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.8989 (1.0124)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [200]  [2501/2502]  eta: 0:00:00  lr: 0.001113  min_lr: 0.001113  loss: 3.5223 (3.1739)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0798 (1.0149)  time: 0.3009  data: 0.0009  max mem: 27255
Epoch: [200] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.001113  min_lr: 0.001113  loss: 3.5223 (3.1751)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0798 (1.0149)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.6448 (0.6448)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 2.7588  data: 2.5665  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9645 (0.9622)  acc1: 86.8000 (85.2727)  acc5: 97.6000 (97.2727)  time: 0.4711  data: 0.3014  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.9589 (0.9774)  acc1: 83.6000 (84.4000)  acc5: 97.2000 (97.3333)  time: 0.2583  data: 0.0926  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1577 (1.0744)  acc1: 80.4000 (82.3613)  acc5: 95.2000 (96.1677)  time: 0.2437  data: 0.0799  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3380 (1.1411)  acc1: 76.8000 (80.7024)  acc5: 93.6000 (95.5415)  time: 0.1918  data: 0.0282  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2909 (1.1622)  acc1: 75.2000 (80.1440)  acc5: 94.0000 (95.4000)  time: 0.1694  data: 0.0060  max mem: 27255
Test: Total time: 0:00:13 (0.2666 s / it)
* Acc@1 80.256 Acc@5 95.362 loss 1.163
Accuracy of the model on the 50000 test images: 80.3%
Max accuracy: 80.29%
Epoch: [201]  [   0/2502]  eta: 1:19:04  lr: 0.001113  min_lr: 0.001113  loss: 3.4582 (3.4582)  weight_decay: 0.0500 (0.0500)  time: 1.8963  data: 1.3885  max mem: 27255
Epoch: [201]  [ 200/2502]  eta: 0:13:09  lr: 0.001111  min_lr: 0.001111  loss: 3.1665 (3.1894)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9731 (0.9764)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [201]  [ 400/2502]  eta: 0:11:52  lr: 0.001110  min_lr: 0.001110  loss: 3.2625 (3.1861)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9753 (1.0098)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [201]  [ 600/2502]  eta: 0:10:41  lr: 0.001108  min_lr: 0.001108  loss: 3.3815 (3.1968)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9885 (1.0252)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [201]  [ 800/2502]  eta: 0:09:32  lr: 0.001106  min_lr: 0.001106  loss: 3.2106 (3.2089)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9683 (1.0137)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [201]  [1000/2502]  eta: 0:08:24  lr: 0.001105  min_lr: 0.001105  loss: 3.3038 (3.2051)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9883 (1.0093)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [201]  [1200/2502]  eta: 0:07:16  lr: 0.001103  min_lr: 0.001103  loss: 3.4680 (3.2059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9145 (1.0064)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [201]  [1400/2502]  eta: 0:06:09  lr: 0.001102  min_lr: 0.001102  loss: 3.1272 (3.2035)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9657 (1.0076)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [201]  [1600/2502]  eta: 0:05:02  lr: 0.001100  min_lr: 0.001100  loss: 3.4273 (3.2054)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9039 (1.0068)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [201]  [1800/2502]  eta: 0:03:55  lr: 0.001098  min_lr: 0.001098  loss: 3.2793 (3.2008)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0140 (1.0082)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [201]  [2000/2502]  eta: 0:02:48  lr: 0.001097  min_lr: 0.001097  loss: 3.1696 (3.1952)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0554 (1.0122)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [201]  [2200/2502]  eta: 0:01:41  lr: 0.001095  min_lr: 0.001095  loss: 3.3048 (3.1951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0193 (1.0146)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [201]  [2400/2502]  eta: 0:00:34  lr: 0.001094  min_lr: 0.001094  loss: 3.2135 (3.1954)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9519 (1.0150)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [201]  [2501/2502]  eta: 0:00:00  lr: 0.001093  min_lr: 0.001093  loss: 2.9899 (3.1915)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0843 (1.0167)  time: 0.3007  data: 0.0007  max mem: 27255
Epoch: [201] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.001093  min_lr: 0.001093  loss: 2.9899 (3.1812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0843 (1.0167)
Test:  [ 0/50]  eta: 0:02:07  loss: 0.5051 (0.5051)  acc1: 93.6000 (93.6000)  acc5: 98.4000 (98.4000)  time: 2.5444  data: 2.3437  max mem: 27255
Test:  [10/50]  eta: 0:00:15  loss: 0.8023 (0.8178)  acc1: 86.0000 (85.4909)  acc5: 97.2000 (97.2000)  time: 0.3963  data: 0.2289  max mem: 27255
Test:  [20/50]  eta: 0:00:08  loss: 0.8210 (0.8355)  acc1: 83.6000 (84.3619)  acc5: 97.2000 (97.2952)  time: 0.1861  data: 0.0222  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0327 (0.9280)  acc1: 80.0000 (82.4129)  acc5: 95.6000 (96.1290)  time: 0.1862  data: 0.0225  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1292 (0.9793)  acc1: 77.6000 (81.0439)  acc5: 93.6000 (95.5902)  time: 0.2007  data: 0.0368  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1478 (0.9992)  acc1: 77.2000 (80.3440)  acc5: 93.6000 (95.3600)  time: 0.1928  data: 0.0278  max mem: 27255
Test: Total time: 0:00:11 (0.2373 s / it)
* Acc@1 80.440 Acc@5 95.548 loss 0.992
Accuracy of the model on the 50000 test images: 80.4%
Max accuracy: 80.44%
Epoch: [202]  [   0/2502]  eta: 1:14:28  lr: 0.001093  min_lr: 0.001093  loss: 2.8601 (2.8601)  weight_decay: 0.0500 (0.0500)  time: 1.7862  data: 1.4424  max mem: 27255
Epoch: [202]  [ 200/2502]  eta: 0:13:08  lr: 0.001091  min_lr: 0.001091  loss: 3.3321 (3.1663)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9291 (0.9860)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [202]  [ 400/2502]  eta: 0:11:52  lr: 0.001090  min_lr: 0.001090  loss: 3.4286 (3.1670)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9866 (0.9973)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [202]  [ 600/2502]  eta: 0:10:41  lr: 0.001088  min_lr: 0.001088  loss: 3.3459 (3.1749)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0013 (1.0148)  time: 0.3379  data: 0.0005  max mem: 27255
Epoch: [202]  [ 800/2502]  eta: 0:09:32  lr: 0.001086  min_lr: 0.001086  loss: 3.1634 (3.1682)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9955 (1.0133)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [202]  [1000/2502]  eta: 0:08:25  lr: 0.001085  min_lr: 0.001085  loss: 3.3742 (3.1778)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0069 (1.0252)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [202]  [1200/2502]  eta: 0:07:17  lr: 0.001083  min_lr: 0.001083  loss: 3.4428 (3.1859)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0572 (1.0366)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [202]  [1400/2502]  eta: 0:06:09  lr: 0.001082  min_lr: 0.001082  loss: 3.1621 (3.1793)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9537 (1.0304)  time: 0.3390  data: 0.0004  max mem: 27255
Epoch: [202]  [1600/2502]  eta: 0:05:02  lr: 0.001080  min_lr: 0.001080  loss: 3.2418 (3.1846)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9997 (1.0258)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [202]  [1800/2502]  eta: 0:03:55  lr: 0.001078  min_lr: 0.001078  loss: 3.1912 (3.1771)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9904 (1.0218)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [202]  [2000/2502]  eta: 0:02:48  lr: 0.001077  min_lr: 0.001077  loss: 3.3488 (3.1877)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9941 (1.0247)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [202]  [2200/2502]  eta: 0:01:41  lr: 0.001075  min_lr: 0.001075  loss: 3.2538 (3.1918)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0126 (1.0279)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [202]  [2400/2502]  eta: 0:00:34  lr: 0.001074  min_lr: 0.001074  loss: 3.2275 (3.1867)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0634 (1.0307)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [202]  [2501/2502]  eta: 0:00:00  lr: 0.001073  min_lr: 0.001073  loss: 3.3486 (3.1843)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0280 (1.0302)  time: 0.3007  data: 0.0009  max mem: 27255
Epoch: [202] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.001073  min_lr: 0.001073  loss: 3.3486 (3.1806)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0280 (1.0302)
Test:  [ 0/50]  eta: 0:03:07  loss: 0.5518 (0.5518)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 3.7421  data: 3.5471  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.8757 (0.8827)  acc1: 85.6000 (85.2727)  acc5: 97.2000 (97.2364)  time: 0.5087  data: 0.3415  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.8808 (0.8963)  acc1: 84.0000 (84.4762)  acc5: 97.2000 (97.4667)  time: 0.2310  data: 0.0669  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0342 (1.0019)  acc1: 80.8000 (82.5032)  acc5: 96.0000 (96.2968)  time: 0.2497  data: 0.0859  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2559 (1.0641)  acc1: 76.4000 (80.8683)  acc5: 93.6000 (95.7561)  time: 0.1932  data: 0.0295  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2559 (1.0878)  acc1: 75.2000 (80.1840)  acc5: 94.4000 (95.6720)  time: 0.1929  data: 0.0293  max mem: 27255
Test: Total time: 0:00:13 (0.2758 s / it)
* Acc@1 80.318 Acc@5 95.614 loss 1.091
Accuracy of the model on the 50000 test images: 80.3%
Max accuracy: 80.44%
Epoch: [203]  [   0/2502]  eta: 1:31:21  lr: 0.001073  min_lr: 0.001073  loss: 3.5199 (3.5199)  weight_decay: 0.0500 (0.0500)  time: 2.1908  data: 1.4627  max mem: 27255
Epoch: [203]  [ 200/2502]  eta: 0:13:11  lr: 0.001071  min_lr: 0.001071  loss: 3.0975 (3.1608)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9859 (1.0277)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [203]  [ 400/2502]  eta: 0:11:52  lr: 0.001070  min_lr: 0.001070  loss: 2.9589 (3.1721)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0481 (1.0309)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [203]  [ 600/2502]  eta: 0:10:42  lr: 0.001068  min_lr: 0.001068  loss: 3.2243 (3.1778)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9295 (1.0302)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [203]  [ 800/2502]  eta: 0:09:33  lr: 0.001066  min_lr: 0.001066  loss: 3.3132 (3.1639)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0837 (1.0347)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [203]  [1000/2502]  eta: 0:08:24  lr: 0.001065  min_lr: 0.001065  loss: 3.3828 (3.1676)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0574 (1.0410)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [203]  [1200/2502]  eta: 0:07:17  lr: 0.001063  min_lr: 0.001063  loss: 3.4654 (3.1809)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0140 (1.0387)  time: 0.3423  data: 0.0003  max mem: 27255
Epoch: [203]  [1400/2502]  eta: 0:06:09  lr: 0.001062  min_lr: 0.001062  loss: 3.0467 (3.1792)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1236 (1.0458)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [203]  [1600/2502]  eta: 0:05:02  lr: 0.001060  min_lr: 0.001060  loss: 3.2717 (3.1767)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1357 (1.0510)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [203]  [1800/2502]  eta: 0:03:55  lr: 0.001059  min_lr: 0.001059  loss: 3.3173 (3.1772)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0921 (1.0558)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [203]  [2000/2502]  eta: 0:02:48  lr: 0.001057  min_lr: 0.001057  loss: 3.1919 (3.1805)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9780 (1.0589)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [203]  [2200/2502]  eta: 0:01:41  lr: 0.001055  min_lr: 0.001055  loss: 3.3655 (3.1842)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9763 (1.0537)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [203]  [2400/2502]  eta: 0:00:34  lr: 0.001054  min_lr: 0.001054  loss: 3.4991 (3.1829)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0332 (1.0548)  time: 0.3341  data: 0.0003  max mem: 27255
Epoch: [203]  [2501/2502]  eta: 0:00:00  lr: 0.001053  min_lr: 0.001053  loss: 3.2822 (3.1817)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0042 (1.0556)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [203] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.001053  min_lr: 0.001053  loss: 3.2822 (3.1707)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0042 (1.0556)
Test:  [ 0/50]  eta: 0:02:07  loss: 0.5359 (0.5359)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 2.5528  data: 2.3508  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8363 (0.8666)  acc1: 86.8000 (85.6727)  acc5: 97.2000 (97.3818)  time: 0.4123  data: 0.2448  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9131 (0.9014)  acc1: 82.8000 (84.4952)  acc5: 97.2000 (97.4095)  time: 0.2393  data: 0.0754  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0801 (1.0045)  acc1: 80.0000 (82.4903)  acc5: 95.2000 (96.1936)  time: 0.2305  data: 0.0666  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2299 (1.0620)  acc1: 77.6000 (80.9268)  acc5: 94.0000 (95.7073)  time: 0.1799  data: 0.0161  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2299 (1.0810)  acc1: 75.2000 (80.1920)  acc5: 94.0000 (95.6240)  time: 0.1716  data: 0.0078  max mem: 27255
Test: Total time: 0:00:12 (0.2507 s / it)
* Acc@1 80.548 Acc@5 95.590 loss 1.081
Accuracy of the model on the 50000 test images: 80.5%
Max accuracy: 80.55%
Epoch: [204]  [   0/2502]  eta: 1:30:49  lr: 0.001053  min_lr: 0.001053  loss: 3.0875 (3.0875)  weight_decay: 0.0500 (0.0500)  time: 2.1782  data: 1.8308  max mem: 27255
Epoch: [204]  [ 200/2502]  eta: 0:13:12  lr: 0.001051  min_lr: 0.001051  loss: 3.2443 (3.1038)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0439 (1.0440)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [204]  [ 400/2502]  eta: 0:11:53  lr: 0.001050  min_lr: 0.001050  loss: 3.1904 (3.1056)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2033 (1.0779)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [204]  [ 600/2502]  eta: 0:10:41  lr: 0.001048  min_lr: 0.001048  loss: 3.0790 (3.1249)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9843 (1.0630)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [204]  [ 800/2502]  eta: 0:09:33  lr: 0.001047  min_lr: 0.001047  loss: 3.0603 (3.1142)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9737 (1.0492)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [204]  [1000/2502]  eta: 0:08:25  lr: 0.001045  min_lr: 0.001045  loss: 3.3398 (3.1210)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9639 (1.0456)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [204]  [1200/2502]  eta: 0:07:17  lr: 0.001044  min_lr: 0.001044  loss: 3.2005 (3.1319)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9434 (1.0414)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [204]  [1400/2502]  eta: 0:06:09  lr: 0.001042  min_lr: 0.001042  loss: 3.1617 (3.1350)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2154 (1.0493)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [204]  [1600/2502]  eta: 0:05:02  lr: 0.001040  min_lr: 0.001040  loss: 3.0929 (3.1370)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9795 (1.0440)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [204]  [1800/2502]  eta: 0:03:55  lr: 0.001039  min_lr: 0.001039  loss: 3.1720 (3.1350)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9510 (1.0444)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [204]  [2000/2502]  eta: 0:02:48  lr: 0.001037  min_lr: 0.001037  loss: 3.2007 (3.1324)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9570 (1.0406)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [204]  [2200/2502]  eta: 0:01:41  lr: 0.001036  min_lr: 0.001036  loss: 3.3410 (3.1359)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9859 (1.0359)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [204]  [2400/2502]  eta: 0:00:34  lr: 0.001034  min_lr: 0.001034  loss: 3.4616 (3.1390)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0105 (1.0379)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [204]  [2501/2502]  eta: 0:00:00  lr: 0.001033  min_lr: 0.001033  loss: 3.3803 (3.1396)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0158 (1.0389)  time: 0.3005  data: 0.0009  max mem: 27255
Epoch: [204] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.001033  min_lr: 0.001033  loss: 3.3803 (3.1560)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0158 (1.0389)
Test:  [ 0/50]  eta: 0:02:25  loss: 0.6525 (0.6525)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 2.9012  data: 2.7116  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9041 (0.9387)  acc1: 84.8000 (85.7091)  acc5: 97.6000 (97.4546)  time: 0.4369  data: 0.2706  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9579 (0.9540)  acc1: 82.0000 (84.4952)  acc5: 97.6000 (97.5619)  time: 0.2340  data: 0.0699  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0802 (1.0354)  acc1: 80.8000 (82.7226)  acc5: 95.6000 (96.4258)  time: 0.2663  data: 0.1023  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.2403 (1.0842)  acc1: 76.4000 (81.2585)  acc5: 93.6000 (95.8732)  time: 0.2370  data: 0.0721  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2403 (1.1042)  acc1: 76.0000 (80.7040)  acc5: 93.6000 (95.6800)  time: 0.1913  data: 0.0266  max mem: 27255
Test: Total time: 0:00:13 (0.2774 s / it)
* Acc@1 80.550 Acc@5 95.610 loss 1.108
Accuracy of the model on the 50000 test images: 80.6%
Max accuracy: 80.55%
Epoch: [205]  [   0/2502]  eta: 1:16:58  lr: 0.001033  min_lr: 0.001033  loss: 2.4333 (2.4333)  weight_decay: 0.0500 (0.0500)  time: 1.8458  data: 1.4985  max mem: 27255
Epoch: [205]  [ 200/2502]  eta: 0:13:07  lr: 0.001032  min_lr: 0.001032  loss: 3.3531 (3.1789)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0466 (1.0289)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [205]  [ 400/2502]  eta: 0:11:52  lr: 0.001030  min_lr: 0.001030  loss: 3.1560 (3.1680)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0474 (1.0552)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [205]  [ 600/2502]  eta: 0:10:41  lr: 0.001029  min_lr: 0.001029  loss: 3.2000 (3.1676)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1284 (1.0661)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [205]  [ 800/2502]  eta: 0:09:33  lr: 0.001027  min_lr: 0.001027  loss: 3.4149 (3.1704)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0193 (1.0568)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [205]  [1000/2502]  eta: 0:08:25  lr: 0.001025  min_lr: 0.001025  loss: 3.1867 (3.1715)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0683 (1.0583)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [205]  [1200/2502]  eta: 0:07:17  lr: 0.001024  min_lr: 0.001024  loss: 3.1105 (3.1649)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9948 (1.0548)  time: 0.3436  data: 0.0004  max mem: 27255
Epoch: [205]  [1400/2502]  eta: 0:06:09  lr: 0.001022  min_lr: 0.001022  loss: 3.1234 (3.1557)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0670 (1.0554)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [205]  [1600/2502]  eta: 0:05:02  lr: 0.001021  min_lr: 0.001021  loss: 3.1873 (3.1562)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [205]  [1800/2502]  eta: 0:03:55  lr: 0.001019  min_lr: 0.001019  loss: 3.1073 (3.1548)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9657 (nan)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [205]  [2000/2502]  eta: 0:02:48  lr: 0.001018  min_lr: 0.001018  loss: 3.0660 (3.1594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9816 (nan)  time: 0.3386  data: 0.0004  max mem: 27255
Epoch: [205]  [2200/2502]  eta: 0:01:41  lr: 0.001016  min_lr: 0.001016  loss: 3.1538 (3.1532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0164 (nan)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [205]  [2400/2502]  eta: 0:00:34  lr: 0.001015  min_lr: 0.001015  loss: 3.2807 (3.1537)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0713 (nan)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [205]  [2501/2502]  eta: 0:00:00  lr: 0.001014  min_lr: 0.001014  loss: 3.1222 (3.1543)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9583 (nan)  time: 0.3014  data: 0.0009  max mem: 27255
Epoch: [205] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.001014  min_lr: 0.001014  loss: 3.1222 (3.1614)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9583 (nan)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.5605 (0.5605)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.7142  data: 2.5175  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8906 (0.8991)  acc1: 87.2000 (85.8546)  acc5: 98.0000 (97.6000)  time: 0.4169  data: 0.2489  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9253 (0.9169)  acc1: 82.4000 (84.5905)  acc5: 98.0000 (97.6762)  time: 0.1798  data: 0.0147  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0264 (1.0130)  acc1: 81.2000 (82.6581)  acc5: 96.4000 (96.4258)  time: 0.1863  data: 0.0218  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2691 (1.0680)  acc1: 76.4000 (81.0927)  acc5: 93.2000 (95.8634)  time: 0.2014  data: 0.0375  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2016 (1.0863)  acc1: 76.4000 (80.4320)  acc5: 94.4000 (95.7360)  time: 0.1860  data: 0.0223  max mem: 27255
Test: Total time: 0:00:11 (0.2395 s / it)
* Acc@1 80.570 Acc@5 95.630 loss 1.083
Accuracy of the model on the 50000 test images: 80.6%
Max accuracy: 80.57%
Epoch: [206]  [   0/2502]  eta: 1:38:05  lr: 0.001014  min_lr: 0.001014  loss: 3.4732 (3.4732)  weight_decay: 0.0500 (0.0500)  time: 2.3524  data: 2.0143  max mem: 27255
Epoch: [206]  [ 200/2502]  eta: 0:13:13  lr: 0.001012  min_lr: 0.001012  loss: 3.5090 (3.1696)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9874 (1.0322)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [206]  [ 400/2502]  eta: 0:11:53  lr: 0.001011  min_lr: 0.001011  loss: 3.2178 (3.1616)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9959 (1.0416)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [206]  [ 600/2502]  eta: 0:10:42  lr: 0.001009  min_lr: 0.001009  loss: 3.4713 (3.1621)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0220 (1.0679)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [206]  [ 800/2502]  eta: 0:09:33  lr: 0.001007  min_lr: 0.001007  loss: 3.3310 (3.1508)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0435 (1.0639)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [206]  [1000/2502]  eta: 0:08:25  lr: 0.001006  min_lr: 0.001006  loss: 2.7928 (3.1351)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9811 (1.0608)  time: 0.3422  data: 0.0004  max mem: 27255
Epoch: [206]  [1200/2502]  eta: 0:07:17  lr: 0.001004  min_lr: 0.001004  loss: 3.0731 (3.1447)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0404 (1.0631)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [206]  [1400/2502]  eta: 0:06:10  lr: 0.001003  min_lr: 0.001003  loss: 3.1886 (3.1437)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1005 (1.0641)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [206]  [1600/2502]  eta: 0:05:02  lr: 0.001001  min_lr: 0.001001  loss: 3.4081 (3.1419)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0507 (1.0703)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [206]  [1800/2502]  eta: 0:03:55  lr: 0.001000  min_lr: 0.001000  loss: 3.3717 (3.1435)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9183 (1.0630)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [206]  [2000/2502]  eta: 0:02:48  lr: 0.000998  min_lr: 0.000998  loss: 3.4369 (3.1424)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0819 (1.0602)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [206]  [2200/2502]  eta: 0:01:41  lr: 0.000997  min_lr: 0.000997  loss: 2.8316 (3.1403)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0438 (1.0620)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [206]  [2400/2502]  eta: 0:00:34  lr: 0.000995  min_lr: 0.000995  loss: 3.3055 (3.1425)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9875 (1.0614)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [206]  [2501/2502]  eta: 0:00:00  lr: 0.000994  min_lr: 0.000994  loss: 3.1024 (3.1416)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0597 (1.0606)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [206] Total time: 0:13:59 (0.3353 s / it)
Averaged stats: lr: 0.000994  min_lr: 0.000994  loss: 3.1024 (3.1457)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0597 (1.0606)
Test:  [ 0/50]  eta: 0:02:23  loss: 0.6279 (0.6279)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 2.8787  data: 2.6762  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.7939 (0.8523)  acc1: 86.0000 (85.5273)  acc5: 98.0000 (97.3091)  time: 0.4157  data: 0.2482  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8383 (0.8612)  acc1: 83.2000 (84.5143)  acc5: 98.0000 (97.5048)  time: 0.1715  data: 0.0075  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0367 (0.9523)  acc1: 80.0000 (82.7097)  acc5: 96.0000 (96.4645)  time: 0.1960  data: 0.0320  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1922 (1.0083)  acc1: 78.4000 (81.1707)  acc5: 94.0000 (95.8927)  time: 0.2267  data: 0.0628  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1922 (1.0274)  acc1: 77.2000 (80.5920)  acc5: 94.4000 (95.7520)  time: 0.2010  data: 0.0373  max mem: 27255
Test: Total time: 0:00:12 (0.2489 s / it)
* Acc@1 80.658 Acc@5 95.674 loss 1.028
Accuracy of the model on the 50000 test images: 80.7%
Max accuracy: 80.66%
Epoch: [207]  [   0/2502]  eta: 1:34:31  lr: 0.000994  min_lr: 0.000994  loss: 2.4773 (2.4773)  weight_decay: 0.0500 (0.0500)  time: 2.2666  data: 1.9209  max mem: 27255
Epoch: [207]  [ 200/2502]  eta: 0:13:10  lr: 0.000993  min_lr: 0.000993  loss: 3.3020 (3.0970)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0930 (1.1234)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [207]  [ 400/2502]  eta: 0:11:50  lr: 0.000991  min_lr: 0.000991  loss: 3.3179 (3.1356)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0158 (1.1046)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [207]  [ 600/2502]  eta: 0:10:40  lr: 0.000990  min_lr: 0.000990  loss: 3.3214 (3.1243)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9789 (1.0786)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [207]  [ 800/2502]  eta: 0:09:31  lr: 0.000988  min_lr: 0.000988  loss: 3.1098 (3.1497)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1828 (1.0869)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [207]  [1000/2502]  eta: 0:08:24  lr: 0.000987  min_lr: 0.000987  loss: 3.1755 (3.1530)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9988 (1.0782)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [207]  [1200/2502]  eta: 0:07:16  lr: 0.000985  min_lr: 0.000985  loss: 3.3196 (3.1500)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0763 (1.0781)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [207]  [1400/2502]  eta: 0:06:09  lr: 0.000983  min_lr: 0.000983  loss: 3.3742 (3.1461)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0351 (1.0707)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [207]  [1600/2502]  eta: 0:05:02  lr: 0.000982  min_lr: 0.000982  loss: 3.3609 (3.1487)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2102 (1.0773)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [207]  [1800/2502]  eta: 0:03:55  lr: 0.000980  min_lr: 0.000980  loss: 3.2397 (3.1502)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0644 (1.0749)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [207]  [2000/2502]  eta: 0:02:48  lr: 0.000979  min_lr: 0.000979  loss: 3.3697 (3.1472)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0122 (1.0774)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [207]  [2200/2502]  eta: 0:01:41  lr: 0.000977  min_lr: 0.000977  loss: 2.9960 (3.1469)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0739 (1.0764)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [207]  [2400/2502]  eta: 0:00:34  lr: 0.000976  min_lr: 0.000976  loss: 3.3691 (3.1472)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9859 (1.0736)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [207]  [2501/2502]  eta: 0:00:00  lr: 0.000975  min_lr: 0.000975  loss: 3.1518 (3.1492)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9997 (1.0717)  time: 0.3016  data: 0.0010  max mem: 27255
Epoch: [207] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000975  min_lr: 0.000975  loss: 3.1518 (3.1483)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9997 (1.0717)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.5866 (0.5866)  acc1: 94.0000 (94.0000)  acc5: 98.4000 (98.4000)  time: 2.9860  data: 2.7936  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8138 (0.8470)  acc1: 86.0000 (85.6727)  acc5: 98.0000 (97.5273)  time: 0.4315  data: 0.2651  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.8618 (0.8593)  acc1: 84.4000 (84.8000)  acc5: 97.2000 (97.5429)  time: 0.2430  data: 0.0773  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9964 (0.9553)  acc1: 80.4000 (82.7742)  acc5: 96.0000 (96.3097)  time: 0.3047  data: 0.1380  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.1772 (1.0080)  acc1: 78.0000 (81.3268)  acc5: 94.0000 (95.8244)  time: 0.2544  data: 0.0898  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1768 (1.0301)  acc1: 76.4000 (80.6640)  acc5: 94.4000 (95.7520)  time: 0.2363  data: 0.0727  max mem: 27255
Test: Total time: 0:00:14 (0.2936 s / it)
* Acc@1 80.706 Acc@5 95.722 loss 1.027
Accuracy of the model on the 50000 test images: 80.7%
Max accuracy: 80.71%
Epoch: [208]  [   0/2502]  eta: 1:13:34  lr: 0.000975  min_lr: 0.000975  loss: 3.5279 (3.5279)  weight_decay: 0.0500 (0.0500)  time: 1.7645  data: 1.4293  max mem: 27255
Epoch: [208]  [ 200/2502]  eta: 0:13:03  lr: 0.000973  min_lr: 0.000973  loss: 3.3049 (3.1594)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9954 (1.0503)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [208]  [ 400/2502]  eta: 0:11:49  lr: 0.000972  min_lr: 0.000972  loss: 3.0154 (3.1711)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0691 (1.0500)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [208]  [ 600/2502]  eta: 0:10:39  lr: 0.000970  min_lr: 0.000970  loss: 3.3806 (3.1754)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1092 (1.0664)  time: 0.3326  data: 0.0003  max mem: 27255
Epoch: [208]  [ 800/2502]  eta: 0:09:30  lr: 0.000969  min_lr: 0.000969  loss: 2.8319 (3.1560)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0777 (1.0712)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [208]  [1000/2502]  eta: 0:08:23  lr: 0.000967  min_lr: 0.000967  loss: 3.2550 (3.1510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1022 (1.0728)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [208]  [1200/2502]  eta: 0:07:15  lr: 0.000966  min_lr: 0.000966  loss: 3.2118 (3.1509)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0747 (1.0790)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [208]  [1400/2502]  eta: 0:06:08  lr: 0.000964  min_lr: 0.000964  loss: 3.3286 (3.1484)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0498 (1.0780)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [208]  [1600/2502]  eta: 0:05:01  lr: 0.000963  min_lr: 0.000963  loss: 3.0272 (3.1516)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0320 (1.0757)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [208]  [1800/2502]  eta: 0:03:54  lr: 0.000961  min_lr: 0.000961  loss: 3.1319 (3.1450)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0275 (1.0696)  time: 0.3325  data: 0.0004  max mem: 27255
Epoch: [208]  [2000/2502]  eta: 0:02:47  lr: 0.000960  min_lr: 0.000960  loss: 3.2910 (3.1403)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9611 (1.0665)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [208]  [2200/2502]  eta: 0:01:40  lr: 0.000958  min_lr: 0.000958  loss: 3.4280 (3.1422)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9993 (1.0626)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [208]  [2400/2502]  eta: 0:00:34  lr: 0.000957  min_lr: 0.000957  loss: 3.0445 (3.1444)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0074 (1.0639)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [208]  [2501/2502]  eta: 0:00:00  lr: 0.000956  min_lr: 0.000956  loss: 3.2182 (3.1436)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0739 (nan)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [208] Total time: 0:13:55 (0.3340 s / it)
Averaged stats: lr: 0.000956  min_lr: 0.000956  loss: 3.2182 (3.1503)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0739 (nan)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5100 (0.5100)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.0029  data: 2.8122  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8060 (0.8285)  acc1: 86.0000 (85.8182)  acc5: 97.2000 (97.3818)  time: 0.4260  data: 0.2596  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8088 (0.8403)  acc1: 83.6000 (85.1238)  acc5: 97.2000 (97.4286)  time: 0.2122  data: 0.0478  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9734 (0.9398)  acc1: 80.8000 (83.0194)  acc5: 95.6000 (96.1806)  time: 0.2251  data: 0.0604  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1373 (0.9880)  acc1: 77.6000 (81.5902)  acc5: 93.6000 (95.6488)  time: 0.2013  data: 0.0367  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1204 (1.0053)  acc1: 76.4000 (80.8640)  acc5: 94.0000 (95.5680)  time: 0.1873  data: 0.0220  max mem: 27255
Test: Total time: 0:00:12 (0.2572 s / it)
* Acc@1 80.704 Acc@5 95.634 loss 1.002
Accuracy of the model on the 50000 test images: 80.7%
Max accuracy: 80.71%
Epoch: [209]  [   0/2502]  eta: 1:29:16  lr: 0.000956  min_lr: 0.000956  loss: 3.4837 (3.4837)  weight_decay: 0.0500 (0.0500)  time: 2.1410  data: 1.6332  max mem: 27255
Epoch: [209]  [ 200/2502]  eta: 0:13:12  lr: 0.000954  min_lr: 0.000954  loss: 3.2931 (3.1409)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0565 (1.0920)  time: 0.3406  data: 0.0004  max mem: 27255
Epoch: [209]  [ 400/2502]  eta: 0:11:52  lr: 0.000953  min_lr: 0.000953  loss: 3.0338 (3.1304)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0288 (1.0724)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [209]  [ 600/2502]  eta: 0:10:41  lr: 0.000951  min_lr: 0.000951  loss: 3.1487 (3.1238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0986 (1.0949)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [209]  [ 800/2502]  eta: 0:09:32  lr: 0.000950  min_lr: 0.000950  loss: 3.0844 (3.1081)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0073 (1.0873)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [209]  [1000/2502]  eta: 0:08:24  lr: 0.000948  min_lr: 0.000948  loss: 3.2369 (3.1059)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9657 (1.0867)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [209]  [1200/2502]  eta: 0:07:17  lr: 0.000947  min_lr: 0.000947  loss: 3.1097 (3.1119)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0060 (1.0815)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [209]  [1400/2502]  eta: 0:06:09  lr: 0.000945  min_lr: 0.000945  loss: 3.2461 (3.1113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0340 (1.0821)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [209]  [1600/2502]  eta: 0:05:02  lr: 0.000944  min_lr: 0.000944  loss: 3.3825 (3.1193)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0602 (1.0876)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [209]  [1800/2502]  eta: 0:03:55  lr: 0.000942  min_lr: 0.000942  loss: 3.3360 (3.1186)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0486 (1.0913)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [209]  [2000/2502]  eta: 0:02:48  lr: 0.000941  min_lr: 0.000941  loss: 3.4829 (3.1201)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0119 (1.0896)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [209]  [2200/2502]  eta: 0:01:41  lr: 0.000939  min_lr: 0.000939  loss: 3.1832 (3.1266)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0506 (1.0879)  time: 0.3441  data: 0.0005  max mem: 27255
Epoch: [209]  [2400/2502]  eta: 0:00:34  lr: 0.000937  min_lr: 0.000937  loss: 3.2976 (3.1262)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1051 (1.0912)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [209]  [2501/2502]  eta: 0:00:00  lr: 0.000937  min_lr: 0.000937  loss: 2.9343 (3.1262)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0992 (1.0924)  time: 0.2997  data: 0.0008  max mem: 27255
Epoch: [209] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000937  min_lr: 0.000937  loss: 2.9343 (3.1339)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0992 (1.0924)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.5734 (0.5734)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 2.6590  data: 2.4745  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8541 (0.8578)  acc1: 88.4000 (85.8909)  acc5: 98.0000 (97.5273)  time: 0.4362  data: 0.2703  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8703 (0.8796)  acc1: 84.0000 (85.0476)  acc5: 97.6000 (97.4857)  time: 0.1957  data: 0.0317  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0035 (0.9759)  acc1: 81.2000 (83.0323)  acc5: 96.0000 (96.2194)  time: 0.2002  data: 0.0356  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1948 (1.0304)  acc1: 76.4000 (81.3951)  acc5: 93.6000 (95.6781)  time: 0.2003  data: 0.0359  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2092 (1.0464)  acc1: 76.0000 (80.7040)  acc5: 94.4000 (95.6000)  time: 0.1756  data: 0.0120  max mem: 27255
Test: Total time: 0:00:12 (0.2447 s / it)
* Acc@1 80.754 Acc@5 95.746 loss 1.047
Accuracy of the model on the 50000 test images: 80.8%
Max accuracy: 80.75%
Epoch: [210]  [   0/2502]  eta: 1:33:05  lr: 0.000937  min_lr: 0.000937  loss: 3.1756 (3.1756)  weight_decay: 0.0500 (0.0500)  time: 2.2324  data: 1.8759  max mem: 27255
Epoch: [210]  [ 200/2502]  eta: 0:13:12  lr: 0.000935  min_lr: 0.000935  loss: 3.2753 (3.1378)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0698 (1.1163)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [210]  [ 400/2502]  eta: 0:11:53  lr: 0.000934  min_lr: 0.000934  loss: 3.4238 (3.1252)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [210]  [ 600/2502]  eta: 0:10:41  lr: 0.000932  min_lr: 0.000932  loss: 3.1559 (3.1180)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0763 (nan)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [210]  [ 800/2502]  eta: 0:09:32  lr: 0.000931  min_lr: 0.000931  loss: 3.3657 (3.1319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0381 (nan)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [210]  [1000/2502]  eta: 0:08:24  lr: 0.000929  min_lr: 0.000929  loss: 3.1559 (3.1171)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9807 (nan)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [210]  [1200/2502]  eta: 0:07:17  lr: 0.000928  min_lr: 0.000928  loss: 3.1187 (3.1270)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0361 (nan)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [210]  [1400/2502]  eta: 0:06:09  lr: 0.000926  min_lr: 0.000926  loss: 2.8335 (3.1210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0683 (nan)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [210]  [1600/2502]  eta: 0:05:02  lr: 0.000925  min_lr: 0.000925  loss: 3.0251 (3.1202)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0625 (nan)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [210]  [1800/2502]  eta: 0:03:55  lr: 0.000923  min_lr: 0.000923  loss: 3.2477 (3.1243)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0455 (nan)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [210]  [2000/2502]  eta: 0:02:48  lr: 0.000922  min_lr: 0.000922  loss: 3.2195 (3.1257)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1296 (nan)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [210]  [2200/2502]  eta: 0:01:41  lr: 0.000920  min_lr: 0.000920  loss: 3.3736 (3.1289)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0341 (nan)  time: 0.3391  data: 0.0005  max mem: 27255
Epoch: [210]  [2400/2502]  eta: 0:00:34  lr: 0.000919  min_lr: 0.000919  loss: 3.4225 (3.1320)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0083 (nan)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [210]  [2501/2502]  eta: 0:00:00  lr: 0.000918  min_lr: 0.000918  loss: 3.3967 (3.1358)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0224 (nan)  time: 0.3077  data: 0.0008  max mem: 27255
Epoch: [210] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000918  min_lr: 0.000918  loss: 3.3967 (3.1336)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0224 (nan)
Test:  [ 0/50]  eta: 0:02:19  loss: 0.6807 (0.6807)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 2.7991  data: 2.6036  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9706 (0.9900)  acc1: 87.6000 (85.7091)  acc5: 97.2000 (97.1636)  time: 0.4737  data: 0.3071  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9706 (0.9909)  acc1: 83.2000 (84.7429)  acc5: 97.6000 (97.5810)  time: 0.2305  data: 0.0648  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1135 (1.0906)  acc1: 80.8000 (82.7742)  acc5: 96.8000 (96.1806)  time: 0.2082  data: 0.0407  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3213 (1.1433)  acc1: 75.6000 (81.2585)  acc5: 93.2000 (95.6390)  time: 0.1928  data: 0.0273  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2805 (1.1642)  acc1: 75.6000 (80.6080)  acc5: 94.4000 (95.6720)  time: 0.1772  data: 0.0136  max mem: 27255
Test: Total time: 0:00:12 (0.2571 s / it)
* Acc@1 80.602 Acc@5 95.748 loss 1.162
Accuracy of the model on the 50000 test images: 80.6%
Max accuracy: 80.75%
Epoch: [211]  [   0/2502]  eta: 1:32:00  lr: 0.000918  min_lr: 0.000918  loss: 3.0584 (3.0584)  weight_decay: 0.0500 (0.0500)  time: 2.2064  data: 1.6916  max mem: 27255
Epoch: [211]  [ 200/2502]  eta: 0:13:10  lr: 0.000916  min_lr: 0.000916  loss: 3.2914 (3.0854)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0451 (1.1464)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [211]  [ 400/2502]  eta: 0:11:51  lr: 0.000915  min_lr: 0.000915  loss: 3.0810 (3.0859)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0235 (1.0998)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [211]  [ 600/2502]  eta: 0:10:41  lr: 0.000913  min_lr: 0.000913  loss: 3.2375 (3.0961)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0596 (1.0902)  time: 0.3409  data: 0.0005  max mem: 27255
Epoch: [211]  [ 800/2502]  eta: 0:09:32  lr: 0.000912  min_lr: 0.000912  loss: 3.2310 (3.1099)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0446 (1.0813)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [211]  [1000/2502]  eta: 0:08:24  lr: 0.000910  min_lr: 0.000910  loss: 3.1583 (3.1154)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0945 (1.0815)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [211]  [1200/2502]  eta: 0:07:17  lr: 0.000909  min_lr: 0.000909  loss: 2.8852 (3.1076)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0554 (1.0797)  time: 0.3414  data: 0.0005  max mem: 27255
Epoch: [211]  [1400/2502]  eta: 0:06:09  lr: 0.000907  min_lr: 0.000907  loss: 3.1001 (3.1106)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0048 (1.0761)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [211]  [1600/2502]  eta: 0:05:02  lr: 0.000906  min_lr: 0.000906  loss: 3.2111 (3.1073)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1100 (1.0755)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [211]  [1800/2502]  eta: 0:03:55  lr: 0.000904  min_lr: 0.000904  loss: 3.3116 (3.1117)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0719 (1.0820)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [211]  [2000/2502]  eta: 0:02:48  lr: 0.000903  min_lr: 0.000903  loss: 3.2013 (3.1156)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0331 (1.0809)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [211]  [2200/2502]  eta: 0:01:41  lr: 0.000901  min_lr: 0.000901  loss: 3.3804 (3.1179)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0658 (1.0867)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [211]  [2400/2502]  eta: 0:00:34  lr: 0.000900  min_lr: 0.000900  loss: 3.3197 (3.1191)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1285 (1.0922)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [211]  [2501/2502]  eta: 0:00:00  lr: 0.000899  min_lr: 0.000899  loss: 3.2657 (3.1189)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1403 (1.0936)  time: 0.3003  data: 0.0009  max mem: 27255
Epoch: [211] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000899  min_lr: 0.000899  loss: 3.2657 (3.1216)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1403 (1.0936)
Test:  [ 0/50]  eta: 0:02:13  loss: 0.5700 (0.5700)  acc1: 92.8000 (92.8000)  acc5: 99.2000 (99.2000)  time: 2.6646  data: 2.4765  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8419 (0.8859)  acc1: 86.4000 (85.3455)  acc5: 98.4000 (97.4909)  time: 0.4222  data: 0.2560  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8456 (0.8885)  acc1: 82.8000 (84.5333)  acc5: 97.6000 (97.4857)  time: 0.2050  data: 0.0382  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0293 (0.9834)  acc1: 80.8000 (82.6710)  acc5: 96.0000 (96.2581)  time: 0.2143  data: 0.0450  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1972 (1.0397)  acc1: 76.8000 (81.1122)  acc5: 93.2000 (95.6878)  time: 0.2297  data: 0.0634  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1851 (1.0534)  acc1: 76.0000 (80.6640)  acc5: 94.0000 (95.5760)  time: 0.2038  data: 0.0396  max mem: 27255
Test: Total time: 0:00:12 (0.2586 s / it)
* Acc@1 80.966 Acc@5 95.718 loss 1.052
Accuracy of the model on the 50000 test images: 81.0%
Max accuracy: 80.97%
Epoch: [212]  [   0/2502]  eta: 1:30:25  lr: 0.000899  min_lr: 0.000899  loss: 2.9639 (2.9639)  weight_decay: 0.0500 (0.0500)  time: 2.1685  data: 1.8217  max mem: 27255
Epoch: [212]  [ 200/2502]  eta: 0:13:11  lr: 0.000897  min_lr: 0.000897  loss: 2.9018 (3.0416)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0980 (1.0795)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [212]  [ 400/2502]  eta: 0:11:53  lr: 0.000896  min_lr: 0.000896  loss: 3.1532 (3.0979)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1713 (1.1062)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [212]  [ 600/2502]  eta: 0:10:42  lr: 0.000894  min_lr: 0.000894  loss: 3.0840 (3.0999)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1400 (1.1029)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [212]  [ 800/2502]  eta: 0:09:34  lr: 0.000893  min_lr: 0.000893  loss: 3.3565 (3.0906)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1212 (1.1123)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [212]  [1000/2502]  eta: 0:08:26  lr: 0.000892  min_lr: 0.000892  loss: 2.7840 (3.0990)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0972 (1.1101)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [212]  [1200/2502]  eta: 0:07:18  lr: 0.000890  min_lr: 0.000890  loss: 3.3067 (3.1121)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1844 (1.1194)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [212]  [1400/2502]  eta: 0:06:10  lr: 0.000889  min_lr: 0.000889  loss: 3.0068 (3.1094)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0056 (1.1155)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [212]  [1600/2502]  eta: 0:05:02  lr: 0.000887  min_lr: 0.000887  loss: 3.3807 (3.1146)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0053 (1.1133)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [212]  [1800/2502]  eta: 0:03:55  lr: 0.000886  min_lr: 0.000886  loss: 3.2044 (3.1172)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1156 (1.1140)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [212]  [2000/2502]  eta: 0:02:48  lr: 0.000884  min_lr: 0.000884  loss: 3.0173 (3.1248)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0220 (1.1111)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [212]  [2200/2502]  eta: 0:01:41  lr: 0.000883  min_lr: 0.000883  loss: 3.3939 (3.1256)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0948 (1.1094)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [212]  [2400/2502]  eta: 0:00:34  lr: 0.000881  min_lr: 0.000881  loss: 3.1761 (3.1252)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0743 (1.1079)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [212]  [2501/2502]  eta: 0:00:00  lr: 0.000880  min_lr: 0.000880  loss: 3.2728 (3.1278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1092 (1.1092)  time: 0.3006  data: 0.0008  max mem: 27255
Epoch: [212] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.000880  min_lr: 0.000880  loss: 3.2728 (3.1238)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1092 (1.1092)
Test:  [ 0/50]  eta: 0:02:21  loss: 0.5471 (0.5471)  acc1: 92.4000 (92.4000)  acc5: 99.2000 (99.2000)  time: 2.8369  data: 2.6395  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7895 (0.8402)  acc1: 86.0000 (85.8545)  acc5: 98.0000 (97.6727)  time: 0.4832  data: 0.3163  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8531 (0.8536)  acc1: 82.8000 (85.1619)  acc5: 98.0000 (97.6191)  time: 0.2321  data: 0.0682  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0218 (0.9612)  acc1: 80.4000 (82.9032)  acc5: 96.4000 (96.4129)  time: 0.2002  data: 0.0354  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1955 (1.0213)  acc1: 76.8000 (81.2878)  acc5: 94.0000 (95.8342)  time: 0.1740  data: 0.0092  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1955 (1.0412)  acc1: 76.4000 (80.6640)  acc5: 94.0000 (95.7520)  time: 0.1787  data: 0.0150  max mem: 27255
Test: Total time: 0:00:12 (0.2568 s / it)
* Acc@1 80.776 Acc@5 95.708 loss 1.040
Accuracy of the model on the 50000 test images: 80.8%
Max accuracy: 80.97%
Epoch: [213]  [   0/2502]  eta: 1:19:07  lr: 0.000880  min_lr: 0.000880  loss: 2.7275 (2.7275)  weight_decay: 0.0500 (0.0500)  time: 1.8974  data: 1.5699  max mem: 27255
Epoch: [213]  [ 200/2502]  eta: 0:13:07  lr: 0.000879  min_lr: 0.000879  loss: 3.0222 (3.1073)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1875 (1.1288)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [213]  [ 400/2502]  eta: 0:11:49  lr: 0.000877  min_lr: 0.000877  loss: 3.1632 (3.1066)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0668 (1.1431)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [213]  [ 600/2502]  eta: 0:10:40  lr: 0.000876  min_lr: 0.000876  loss: 3.0208 (3.0905)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0481 (1.1319)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [213]  [ 800/2502]  eta: 0:09:31  lr: 0.000874  min_lr: 0.000874  loss: 3.2712 (3.0995)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9840 (1.1101)  time: 0.3327  data: 0.0005  max mem: 27255
Epoch: [213]  [1000/2502]  eta: 0:08:23  lr: 0.000873  min_lr: 0.000873  loss: 3.1841 (3.0904)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1101 (1.1136)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [213]  [1200/2502]  eta: 0:07:16  lr: 0.000871  min_lr: 0.000871  loss: 2.9671 (3.0883)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0556 (1.1089)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [213]  [1400/2502]  eta: 0:06:09  lr: 0.000870  min_lr: 0.000870  loss: 3.3982 (3.0948)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0478 (1.1060)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [213]  [1600/2502]  eta: 0:05:02  lr: 0.000868  min_lr: 0.000868  loss: 3.0040 (3.0965)  weight_decay: 0.0500 (0.0500)  grad_norm: 0.9985 (1.1031)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [213]  [1800/2502]  eta: 0:03:55  lr: 0.000867  min_lr: 0.000867  loss: 3.3367 (3.0969)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0180 (1.0974)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [213]  [2000/2502]  eta: 0:02:48  lr: 0.000866  min_lr: 0.000866  loss: 3.0835 (3.1019)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1326 (1.0998)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [213]  [2200/2502]  eta: 0:01:41  lr: 0.000864  min_lr: 0.000864  loss: 3.4551 (3.1060)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1007 (1.1033)  time: 0.3380  data: 0.0004  max mem: 27255
Epoch: [213]  [2400/2502]  eta: 0:00:34  lr: 0.000863  min_lr: 0.000863  loss: 3.3359 (3.1095)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0973 (1.1021)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [213]  [2501/2502]  eta: 0:00:00  lr: 0.000862  min_lr: 0.000862  loss: 3.0221 (3.1118)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0649 (1.1024)  time: 0.3052  data: 0.0009  max mem: 27255
Epoch: [213] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000862  min_lr: 0.000862  loss: 3.0221 (3.1122)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0649 (1.1024)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.5574 (0.5574)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.1014  data: 2.9030  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8726 (0.8583)  acc1: 86.0000 (86.1091)  acc5: 97.6000 (97.5636)  time: 0.4509  data: 0.2837  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8726 (0.8690)  acc1: 84.0000 (85.3524)  acc5: 97.6000 (97.6191)  time: 0.1928  data: 0.0287  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9850 (0.9628)  acc1: 82.4000 (83.0452)  acc5: 96.4000 (96.5032)  time: 0.2070  data: 0.0430  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1811 (1.0126)  acc1: 78.0000 (81.6390)  acc5: 93.6000 (95.9707)  time: 0.2073  data: 0.0434  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1600 (1.0313)  acc1: 78.0000 (80.9600)  acc5: 94.0000 (95.8320)  time: 0.1824  data: 0.0186  max mem: 27255
Test: Total time: 0:00:12 (0.2539 s / it)
* Acc@1 80.996 Acc@5 95.830 loss 1.027
Accuracy of the model on the 50000 test images: 81.0%
Max accuracy: 81.00%
Epoch: [214]  [   0/2502]  eta: 1:13:56  lr: 0.000862  min_lr: 0.000862  loss: 3.5711 (3.5711)  weight_decay: 0.0500 (0.0500)  time: 1.7732  data: 1.4185  max mem: 27255
Epoch: [214]  [ 200/2502]  eta: 0:13:05  lr: 0.000860  min_lr: 0.000860  loss: 3.2220 (3.0771)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0538 (1.0956)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [214]  [ 400/2502]  eta: 0:11:50  lr: 0.000859  min_lr: 0.000859  loss: 2.7669 (3.0970)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1538 (1.0874)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [214]  [ 600/2502]  eta: 0:10:40  lr: 0.000857  min_lr: 0.000857  loss: 3.2258 (3.0954)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0850 (1.1191)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [214]  [ 800/2502]  eta: 0:09:32  lr: 0.000856  min_lr: 0.000856  loss: 3.2777 (3.0935)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0658 (1.1090)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [214]  [1000/2502]  eta: 0:08:24  lr: 0.000854  min_lr: 0.000854  loss: 3.0505 (3.0848)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0871 (1.1147)  time: 0.3405  data: 0.0004  max mem: 27255
Epoch: [214]  [1200/2502]  eta: 0:07:17  lr: 0.000853  min_lr: 0.000853  loss: 3.2729 (3.0942)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0584 (1.1141)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [214]  [1400/2502]  eta: 0:06:09  lr: 0.000852  min_lr: 0.000852  loss: 3.3681 (3.0906)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1324 (1.1123)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [214]  [1600/2502]  eta: 0:05:02  lr: 0.000850  min_lr: 0.000850  loss: 3.2928 (3.0955)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0202 (1.1137)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [214]  [1800/2502]  eta: 0:03:55  lr: 0.000849  min_lr: 0.000849  loss: 3.1857 (3.1000)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1419 (1.1120)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [214]  [2000/2502]  eta: 0:02:48  lr: 0.000847  min_lr: 0.000847  loss: 3.0051 (3.1025)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0830 (1.1184)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [214]  [2200/2502]  eta: 0:01:41  lr: 0.000846  min_lr: 0.000846  loss: 3.0290 (3.1049)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1213 (1.1182)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [214]  [2400/2502]  eta: 0:00:34  lr: 0.000844  min_lr: 0.000844  loss: 2.5259 (3.0972)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1264 (1.1212)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [214]  [2501/2502]  eta: 0:00:00  lr: 0.000843  min_lr: 0.000843  loss: 2.8910 (3.0939)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0501 (1.1190)  time: 0.3005  data: 0.0007  max mem: 27255
Epoch: [214] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000843  min_lr: 0.000843  loss: 2.8910 (3.1126)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0501 (1.1190)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.4861 (0.4861)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.2217  data: 3.0301  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7518 (0.8047)  acc1: 87.2000 (85.6727)  acc5: 97.6000 (97.5273)  time: 0.4424  data: 0.2759  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7850 (0.8293)  acc1: 83.6000 (84.9143)  acc5: 97.6000 (97.5619)  time: 0.1751  data: 0.0079  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9857 (0.9261)  acc1: 82.4000 (82.7613)  acc5: 96.4000 (96.2323)  time: 0.1887  data: 0.0206  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1483 (0.9813)  acc1: 76.8000 (81.2585)  acc5: 93.2000 (95.6878)  time: 0.1955  data: 0.0310  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1345 (0.9964)  acc1: 76.8000 (80.7440)  acc5: 93.6000 (95.5840)  time: 0.1951  data: 0.0316  max mem: 27255
Test: Total time: 0:00:12 (0.2496 s / it)
* Acc@1 80.910 Acc@5 95.768 loss 0.994
Accuracy of the model on the 50000 test images: 80.9%
Max accuracy: 81.00%
Epoch: [215]  [   0/2502]  eta: 1:34:06  lr: 0.000843  min_lr: 0.000843  loss: 3.0531 (3.0531)  weight_decay: 0.0500 (0.0500)  time: 2.2568  data: 1.5316  max mem: 27255
Epoch: [215]  [ 200/2502]  eta: 0:13:14  lr: 0.000842  min_lr: 0.000842  loss: 3.0620 (3.0641)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1061 (1.0777)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [215]  [ 400/2502]  eta: 0:11:54  lr: 0.000841  min_lr: 0.000841  loss: 3.3504 (3.0836)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0680 (1.0818)  time: 0.3429  data: 0.0005  max mem: 27255
Epoch: [215]  [ 600/2502]  eta: 0:10:43  lr: 0.000839  min_lr: 0.000839  loss: 3.3004 (3.0746)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2213 (1.1246)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [215]  [ 800/2502]  eta: 0:09:33  lr: 0.000838  min_lr: 0.000838  loss: 2.9202 (3.0809)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1023 (1.1354)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [215]  [1000/2502]  eta: 0:08:25  lr: 0.000836  min_lr: 0.000836  loss: 3.1888 (3.0812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0990 (1.1354)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [215]  [1200/2502]  eta: 0:07:17  lr: 0.000835  min_lr: 0.000835  loss: 3.1959 (3.0796)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1306 (1.1452)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [215]  [1400/2502]  eta: 0:06:10  lr: 0.000833  min_lr: 0.000833  loss: 3.3363 (3.0824)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1512 (1.1466)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [215]  [1600/2502]  eta: 0:05:02  lr: 0.000832  min_lr: 0.000832  loss: 3.2199 (3.0933)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1692 (1.1472)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [215]  [1800/2502]  eta: 0:03:55  lr: 0.000830  min_lr: 0.000830  loss: 3.0952 (3.1008)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1994 (1.1476)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [215]  [2000/2502]  eta: 0:02:48  lr: 0.000829  min_lr: 0.000829  loss: 3.3304 (3.0997)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0563 (1.1454)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [215]  [2200/2502]  eta: 0:01:41  lr: 0.000827  min_lr: 0.000827  loss: 3.2485 (3.1004)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1372 (1.1506)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [215]  [2400/2502]  eta: 0:00:34  lr: 0.000826  min_lr: 0.000826  loss: 3.1542 (3.1045)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0801 (1.1497)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [215]  [2501/2502]  eta: 0:00:00  lr: 0.000825  min_lr: 0.000825  loss: 3.2769 (3.1072)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0718 (1.1500)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [215] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000825  min_lr: 0.000825  loss: 3.2769 (3.1034)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0718 (1.1500)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.5692 (0.5692)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.9306  data: 2.7345  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8377 (0.8530)  acc1: 88.4000 (86.2182)  acc5: 98.0000 (97.6364)  time: 0.4230  data: 0.2560  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8602 (0.8748)  acc1: 84.0000 (85.2571)  acc5: 97.6000 (97.6191)  time: 0.1924  data: 0.0285  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0112 (0.9672)  acc1: 80.8000 (83.2645)  acc5: 95.2000 (96.3355)  time: 0.2513  data: 0.0877  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1812 (1.0181)  acc1: 78.0000 (81.7951)  acc5: 93.6000 (95.8439)  time: 0.2298  data: 0.0662  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1635 (1.0345)  acc1: 76.4000 (81.0960)  acc5: 93.6000 (95.7440)  time: 0.1695  data: 0.0059  max mem: 27255
Test: Total time: 0:00:12 (0.2599 s / it)
* Acc@1 81.084 Acc@5 95.864 loss 1.031
Accuracy of the model on the 50000 test images: 81.1%
Max accuracy: 81.08%
Epoch: [216]  [   0/2502]  eta: 1:37:55  lr: 0.000825  min_lr: 0.000825  loss: 3.2473 (3.2473)  weight_decay: 0.0500 (0.0500)  time: 2.3485  data: 1.9941  max mem: 27255
Epoch: [216]  [ 200/2502]  eta: 0:13:10  lr: 0.000824  min_lr: 0.000824  loss: 3.0265 (3.0745)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0853 (1.1172)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [216]  [ 400/2502]  eta: 0:11:52  lr: 0.000822  min_lr: 0.000822  loss: 3.1526 (3.1034)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1196 (1.1117)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [216]  [ 600/2502]  eta: 0:10:41  lr: 0.000821  min_lr: 0.000821  loss: 3.3559 (3.0903)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1511 (1.1237)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [216]  [ 800/2502]  eta: 0:09:32  lr: 0.000819  min_lr: 0.000819  loss: 3.0777 (3.0983)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0956 (1.1263)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [216]  [1000/2502]  eta: 0:08:24  lr: 0.000818  min_lr: 0.000818  loss: 3.1626 (3.1011)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1605 (1.1298)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [216]  [1200/2502]  eta: 0:07:17  lr: 0.000817  min_lr: 0.000817  loss: 3.3181 (3.0964)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0380 (1.1299)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [216]  [1400/2502]  eta: 0:06:09  lr: 0.000815  min_lr: 0.000815  loss: 3.2656 (3.0961)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0296 (1.1323)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [216]  [1600/2502]  eta: 0:05:02  lr: 0.000814  min_lr: 0.000814  loss: 3.0036 (3.0981)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1518 (1.1410)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [216]  [1800/2502]  eta: 0:03:55  lr: 0.000812  min_lr: 0.000812  loss: 3.2532 (3.0949)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0967 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [216]  [2000/2502]  eta: 0:02:48  lr: 0.000811  min_lr: 0.000811  loss: 3.1492 (3.1014)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1716 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [216]  [2200/2502]  eta: 0:01:41  lr: 0.000809  min_lr: 0.000809  loss: 2.9882 (3.0982)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1444 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [216]  [2400/2502]  eta: 0:00:34  lr: 0.000808  min_lr: 0.000808  loss: 3.2889 (3.0997)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1048 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [216]  [2501/2502]  eta: 0:00:00  lr: 0.000807  min_lr: 0.000807  loss: 3.1966 (3.1001)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0162 (inf)  time: 0.3004  data: 0.0007  max mem: 27255
Epoch: [216] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000807  min_lr: 0.000807  loss: 3.1966 (3.0927)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0162 (inf)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.5385 (0.5385)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.8108  data: 2.6155  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7930 (0.8325)  acc1: 88.8000 (85.9273)  acc5: 98.0000 (97.4545)  time: 0.4531  data: 0.2811  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8012 (0.8479)  acc1: 83.2000 (84.9143)  acc5: 97.6000 (97.6000)  time: 0.2174  data: 0.0506  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0625 (0.9492)  acc1: 80.8000 (83.0968)  acc5: 94.8000 (96.2839)  time: 0.2301  data: 0.0640  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1484 (0.9987)  acc1: 78.4000 (81.8439)  acc5: 93.6000 (95.8732)  time: 0.2033  data: 0.0373  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1122 (1.0204)  acc1: 78.0000 (81.2320)  acc5: 94.8000 (95.8320)  time: 0.1640  data: 0.0001  max mem: 27255
Test: Total time: 0:00:12 (0.2557 s / it)
* Acc@1 81.200 Acc@5 95.840 loss 1.020
Accuracy of the model on the 50000 test images: 81.2%
Max accuracy: 81.20%
Epoch: [217]  [   0/2502]  eta: 1:15:39  lr: 0.000807  min_lr: 0.000807  loss: 3.3085 (3.3085)  weight_decay: 0.0500 (0.0500)  time: 1.8142  data: 1.4637  max mem: 27255
Epoch: [217]  [ 200/2502]  eta: 0:13:09  lr: 0.000806  min_lr: 0.000806  loss: 3.2526 (3.1096)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1245 (1.1570)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [217]  [ 400/2502]  eta: 0:11:52  lr: 0.000804  min_lr: 0.000804  loss: 2.9957 (3.0945)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0997 (1.1507)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [217]  [ 600/2502]  eta: 0:10:42  lr: 0.000803  min_lr: 0.000803  loss: 3.1635 (3.0924)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0751 (1.1568)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [217]  [ 800/2502]  eta: 0:09:33  lr: 0.000801  min_lr: 0.000801  loss: 3.2056 (3.0801)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0962 (1.1455)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [217]  [1000/2502]  eta: 0:08:25  lr: 0.000800  min_lr: 0.000800  loss: 3.2856 (3.0740)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0935 (1.1553)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [217]  [1200/2502]  eta: 0:07:17  lr: 0.000799  min_lr: 0.000799  loss: 3.1320 (3.0788)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0976 (1.1494)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [217]  [1400/2502]  eta: 0:06:10  lr: 0.000797  min_lr: 0.000797  loss: 3.1650 (3.0859)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1107 (1.1491)  time: 0.3422  data: 0.0004  max mem: 27255
Epoch: [217]  [1600/2502]  eta: 0:05:03  lr: 0.000796  min_lr: 0.000796  loss: 3.0616 (3.0902)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1563 (1.1481)  time: 0.3434  data: 0.0004  max mem: 27255
Epoch: [217]  [1800/2502]  eta: 0:03:55  lr: 0.000794  min_lr: 0.000794  loss: 3.0062 (3.0869)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1502 (1.1495)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [217]  [2000/2502]  eta: 0:02:48  lr: 0.000793  min_lr: 0.000793  loss: 3.2667 (3.0834)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1397 (1.1424)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [217]  [2200/2502]  eta: 0:01:41  lr: 0.000791  min_lr: 0.000791  loss: 3.2623 (3.0917)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1231 (1.1419)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [217]  [2400/2502]  eta: 0:00:34  lr: 0.000790  min_lr: 0.000790  loss: 3.2481 (3.0907)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1742 (1.1441)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [217]  [2501/2502]  eta: 0:00:00  lr: 0.000789  min_lr: 0.000789  loss: 3.3470 (3.0939)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1380 (1.1437)  time: 0.3012  data: 0.0007  max mem: 27255
Epoch: [217] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000789  min_lr: 0.000789  loss: 3.3470 (3.0882)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1380 (1.1437)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.6993 (0.6993)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.6040  data: 2.4073  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.9916 (1.0011)  acc1: 86.8000 (85.1273)  acc5: 97.6000 (97.5273)  time: 0.4552  data: 0.2884  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 1.0071 (1.0173)  acc1: 83.6000 (84.5714)  acc5: 97.6000 (97.5429)  time: 0.2429  data: 0.0792  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.1639 (1.1083)  acc1: 80.4000 (82.7871)  acc5: 95.6000 (96.4387)  time: 0.2550  data: 0.0913  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.3241 (1.1561)  acc1: 78.0000 (81.4146)  acc5: 94.4000 (96.0000)  time: 0.2329  data: 0.0694  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2988 (1.1681)  acc1: 77.6000 (80.7760)  acc5: 94.8000 (95.8880)  time: 0.1825  data: 0.0190  max mem: 27255
Test: Total time: 0:00:13 (0.2736 s / it)
* Acc@1 81.070 Acc@5 95.814 loss 1.168
Accuracy of the model on the 50000 test images: 81.1%
Max accuracy: 81.20%
Epoch: [218]  [   0/2502]  eta: 1:20:24  lr: 0.000789  min_lr: 0.000789  loss: 3.1912 (3.1912)  weight_decay: 0.0500 (0.0500)  time: 1.9282  data: 1.5919  max mem: 27255
Epoch: [218]  [ 200/2502]  eta: 0:13:12  lr: 0.000788  min_lr: 0.000788  loss: 3.3420 (3.0625)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1308 (1.1664)  time: 0.3430  data: 0.0004  max mem: 27255
Epoch: [218]  [ 400/2502]  eta: 0:11:53  lr: 0.000786  min_lr: 0.000786  loss: 3.2338 (3.0847)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0280 (1.1410)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [218]  [ 600/2502]  eta: 0:10:42  lr: 0.000785  min_lr: 0.000785  loss: 3.2138 (3.0900)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1717 (1.1477)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [218]  [ 800/2502]  eta: 0:09:33  lr: 0.000784  min_lr: 0.000784  loss: 3.3555 (3.0881)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0564 (1.1487)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [218]  [1000/2502]  eta: 0:08:24  lr: 0.000782  min_lr: 0.000782  loss: 3.0737 (3.0822)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1333 (1.1461)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [218]  [1200/2502]  eta: 0:07:17  lr: 0.000781  min_lr: 0.000781  loss: 3.0776 (3.0795)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0896 (1.1433)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [218]  [1400/2502]  eta: 0:06:09  lr: 0.000779  min_lr: 0.000779  loss: 2.9086 (3.0840)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1210 (1.1419)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [218]  [1600/2502]  eta: 0:05:02  lr: 0.000778  min_lr: 0.000778  loss: 3.1102 (3.0892)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1050 (1.1382)  time: 0.3365  data: 0.0005  max mem: 27255
Epoch: [218]  [1800/2502]  eta: 0:03:55  lr: 0.000776  min_lr: 0.000776  loss: 3.0450 (3.0931)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1952 (1.1386)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [218]  [2000/2502]  eta: 0:02:48  lr: 0.000775  min_lr: 0.000775  loss: 3.2659 (3.0981)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1263 (1.1417)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [218]  [2200/2502]  eta: 0:01:41  lr: 0.000774  min_lr: 0.000774  loss: 3.2311 (3.0981)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1388 (1.1444)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [218]  [2400/2502]  eta: 0:00:34  lr: 0.000772  min_lr: 0.000772  loss: 3.0629 (3.0960)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1499 (1.1475)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [218]  [2501/2502]  eta: 0:00:00  lr: 0.000771  min_lr: 0.000771  loss: 3.1410 (3.0945)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1656 (1.1484)  time: 0.3107  data: 0.0008  max mem: 27255
Epoch: [218] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000771  min_lr: 0.000771  loss: 3.1410 (3.0867)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1656 (1.1484)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.5014 (0.5014)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 2.6054  data: 2.4004  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8100 (0.8043)  acc1: 88.8000 (86.4727)  acc5: 98.0000 (97.5636)  time: 0.4450  data: 0.2773  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8279 (0.8311)  acc1: 83.2000 (85.1429)  acc5: 97.6000 (97.6762)  time: 0.2052  data: 0.0412  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9387 (0.9191)  acc1: 80.4000 (83.3032)  acc5: 96.0000 (96.5419)  time: 0.1833  data: 0.0176  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1384 (0.9736)  acc1: 76.8000 (81.8927)  acc5: 93.6000 (95.9805)  time: 0.1940  data: 0.0275  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1198 (0.9874)  acc1: 76.8000 (81.2400)  acc5: 94.0000 (95.8720)  time: 0.1844  data: 0.0187  max mem: 27255
Test: Total time: 0:00:12 (0.2437 s / it)
* Acc@1 81.326 Acc@5 95.946 loss 0.989
Accuracy of the model on the 50000 test images: 81.3%
Max accuracy: 81.33%
Epoch: [219]  [   0/2502]  eta: 1:29:54  lr: 0.000771  min_lr: 0.000771  loss: 3.1074 (3.1074)  weight_decay: 0.0500 (0.0500)  time: 2.1560  data: 1.8091  max mem: 27255
Epoch: [219]  [ 200/2502]  eta: 0:13:12  lr: 0.000770  min_lr: 0.000770  loss: 3.0212 (3.0497)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2728 (1.2237)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [219]  [ 400/2502]  eta: 0:11:53  lr: 0.000769  min_lr: 0.000769  loss: 3.3208 (3.0637)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0588 (1.1908)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [219]  [ 600/2502]  eta: 0:10:42  lr: 0.000767  min_lr: 0.000767  loss: 3.0649 (3.0723)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1187 (1.1592)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [219]  [ 800/2502]  eta: 0:09:33  lr: 0.000766  min_lr: 0.000766  loss: 3.0973 (3.0684)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1209 (1.1529)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [219]  [1000/2502]  eta: 0:08:25  lr: 0.000764  min_lr: 0.000764  loss: 3.2460 (3.0564)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1052 (1.1571)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [219]  [1200/2502]  eta: 0:07:17  lr: 0.000763  min_lr: 0.000763  loss: 3.0754 (3.0558)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0642 (1.1633)  time: 0.3398  data: 0.0005  max mem: 27255
Epoch: [219]  [1400/2502]  eta: 0:06:10  lr: 0.000762  min_lr: 0.000762  loss: 3.1166 (3.0625)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2755 (1.1749)  time: 0.3367  data: 0.0005  max mem: 27255
Epoch: [219]  [1600/2502]  eta: 0:05:03  lr: 0.000760  min_lr: 0.000760  loss: 3.2540 (3.0670)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0995 (1.1693)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [219]  [1800/2502]  eta: 0:03:55  lr: 0.000759  min_lr: 0.000759  loss: 3.0992 (3.0727)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2041 (1.1696)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [219]  [2000/2502]  eta: 0:02:48  lr: 0.000757  min_lr: 0.000757  loss: 2.8277 (3.0742)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1675 (1.1727)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [219]  [2200/2502]  eta: 0:01:41  lr: 0.000756  min_lr: 0.000756  loss: 2.9751 (3.0749)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1110 (1.1664)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [219]  [2400/2502]  eta: 0:00:34  lr: 0.000755  min_lr: 0.000755  loss: 2.9183 (3.0728)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1881 (1.1688)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [219]  [2501/2502]  eta: 0:00:00  lr: 0.000754  min_lr: 0.000754  loss: 3.1212 (3.0723)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1480 (1.1692)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [219] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.000754  min_lr: 0.000754  loss: 3.1212 (3.0752)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1480 (1.1692)
Test:  [ 0/50]  eta: 0:02:14  loss: 0.5150 (0.5150)  acc1: 94.8000 (94.8000)  acc5: 98.8000 (98.8000)  time: 2.6880  data: 2.4899  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7948 (0.7838)  acc1: 87.6000 (87.0182)  acc5: 97.6000 (97.7818)  time: 0.4401  data: 0.2696  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7948 (0.8085)  acc1: 83.2000 (85.4857)  acc5: 97.6000 (97.7524)  time: 0.1963  data: 0.0305  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9445 (0.8985)  acc1: 82.0000 (83.4581)  acc5: 96.8000 (96.6581)  time: 0.2243  data: 0.0606  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0979 (0.9498)  acc1: 78.8000 (82.1854)  acc5: 94.0000 (96.1073)  time: 0.2197  data: 0.0561  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0988 (0.9655)  acc1: 77.6000 (81.6000)  acc5: 94.4000 (95.9680)  time: 0.1658  data: 0.0023  max mem: 27255
Test: Total time: 0:00:12 (0.2517 s / it)
* Acc@1 81.474 Acc@5 95.954 loss 0.966
Accuracy of the model on the 50000 test images: 81.5%
Max accuracy: 81.47%
Epoch: [220]  [   0/2502]  eta: 1:35:57  lr: 0.000754  min_lr: 0.000754  loss: 3.2152 (3.2152)  weight_decay: 0.0500 (0.0500)  time: 2.3012  data: 1.9498  max mem: 27255
Epoch: [220]  [ 200/2502]  eta: 0:13:13  lr: 0.000752  min_lr: 0.000752  loss: 2.9888 (3.0805)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2087 (1.1829)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [220]  [ 400/2502]  eta: 0:11:53  lr: 0.000751  min_lr: 0.000751  loss: 2.9439 (3.0887)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1307 (1.1711)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [220]  [ 600/2502]  eta: 0:10:41  lr: 0.000750  min_lr: 0.000750  loss: 3.0061 (3.0869)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2239 (1.1777)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [220]  [ 800/2502]  eta: 0:09:33  lr: 0.000748  min_lr: 0.000748  loss: 2.6548 (3.0823)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1747 (1.1691)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [220]  [1000/2502]  eta: 0:08:24  lr: 0.000747  min_lr: 0.000747  loss: 3.1441 (3.0780)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1458 (1.1690)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [220]  [1200/2502]  eta: 0:07:16  lr: 0.000745  min_lr: 0.000745  loss: 3.1569 (3.0780)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1973 (1.1752)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [220]  [1400/2502]  eta: 0:06:09  lr: 0.000744  min_lr: 0.000744  loss: 3.0449 (3.0810)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0972 (1.1702)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [220]  [1600/2502]  eta: 0:05:02  lr: 0.000743  min_lr: 0.000743  loss: 3.2751 (3.0780)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0945 (1.1698)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [220]  [1800/2502]  eta: 0:03:55  lr: 0.000741  min_lr: 0.000741  loss: 3.2595 (3.0803)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1707 (1.1665)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [220]  [2000/2502]  eta: 0:02:48  lr: 0.000740  min_lr: 0.000740  loss: 3.0558 (3.0811)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1352 (1.1662)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [220]  [2200/2502]  eta: 0:01:41  lr: 0.000738  min_lr: 0.000738  loss: 3.0664 (3.0819)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2054 (1.1664)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [220]  [2400/2502]  eta: 0:00:34  lr: 0.000737  min_lr: 0.000737  loss: 2.9261 (3.0762)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2142 (1.1718)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [220]  [2501/2502]  eta: 0:00:00  lr: 0.000736  min_lr: 0.000736  loss: 3.3092 (3.0772)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2204 (1.1758)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [220] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000736  min_lr: 0.000736  loss: 3.3092 (3.0722)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2204 (1.1758)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.5641 (0.5641)  acc1: 92.4000 (92.4000)  acc5: 98.8000 (98.8000)  time: 2.7409  data: 2.5279  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8116 (0.8047)  acc1: 87.6000 (86.7273)  acc5: 98.4000 (97.4909)  time: 0.4168  data: 0.2485  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8116 (0.8349)  acc1: 83.2000 (85.2952)  acc5: 97.6000 (97.5048)  time: 0.1904  data: 0.0259  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9696 (0.9307)  acc1: 82.0000 (83.4194)  acc5: 95.6000 (96.4516)  time: 0.2178  data: 0.0533  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1561 (0.9850)  acc1: 78.8000 (82.0585)  acc5: 94.0000 (95.9512)  time: 0.2521  data: 0.0864  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1404 (1.0072)  acc1: 78.0000 (81.2880)  acc5: 94.4000 (95.7920)  time: 0.2167  data: 0.0488  max mem: 27255
Test: Total time: 0:00:13 (0.2640 s / it)
* Acc@1 81.394 Acc@5 95.890 loss 1.006
Accuracy of the model on the 50000 test images: 81.4%
Max accuracy: 81.47%
Epoch: [221]  [   0/2502]  eta: 1:28:41  lr: 0.000736  min_lr: 0.000736  loss: 2.3791 (2.3791)  weight_decay: 0.0500 (0.0500)  time: 2.1270  data: 1.7859  max mem: 27255
Epoch: [221]  [ 200/2502]  eta: 0:13:11  lr: 0.000735  min_lr: 0.000735  loss: 3.2464 (3.0879)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1329 (1.1398)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [221]  [ 400/2502]  eta: 0:11:52  lr: 0.000734  min_lr: 0.000734  loss: 3.0528 (3.0840)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0681 (1.1567)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [221]  [ 600/2502]  eta: 0:10:42  lr: 0.000732  min_lr: 0.000732  loss: 3.1675 (3.0856)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2255 (1.1728)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [221]  [ 800/2502]  eta: 0:09:33  lr: 0.000731  min_lr: 0.000731  loss: 2.9932 (3.0691)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2320 (1.1953)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [221]  [1000/2502]  eta: 0:08:25  lr: 0.000729  min_lr: 0.000729  loss: 2.9243 (3.0543)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2078 (1.1924)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [221]  [1200/2502]  eta: 0:07:17  lr: 0.000728  min_lr: 0.000728  loss: 3.2007 (3.0554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1720 (1.1874)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [221]  [1400/2502]  eta: 0:06:10  lr: 0.000727  min_lr: 0.000727  loss: 3.3775 (3.0589)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1717 (1.1798)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [221]  [1600/2502]  eta: 0:05:02  lr: 0.000725  min_lr: 0.000725  loss: 3.3359 (3.0609)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1237 (1.1857)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [221]  [1800/2502]  eta: 0:03:55  lr: 0.000724  min_lr: 0.000724  loss: 3.2903 (3.0617)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1117 (1.1842)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [221]  [2000/2502]  eta: 0:02:48  lr: 0.000723  min_lr: 0.000723  loss: 3.1293 (3.0622)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1704 (1.1887)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [221]  [2200/2502]  eta: 0:01:41  lr: 0.000721  min_lr: 0.000721  loss: 3.1230 (3.0602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1923 (1.1865)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [221]  [2400/2502]  eta: 0:00:34  lr: 0.000720  min_lr: 0.000720  loss: 3.0468 (3.0573)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1669 (1.1856)  time: 0.3421  data: 0.0004  max mem: 27255
Epoch: [221]  [2501/2502]  eta: 0:00:00  lr: 0.000719  min_lr: 0.000719  loss: 2.7650 (3.0567)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1221 (1.1864)  time: 0.2997  data: 0.0006  max mem: 27255
Epoch: [221] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000719  min_lr: 0.000719  loss: 2.7650 (3.0602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1221 (1.1864)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.5057 (0.5057)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.0493  data: 2.8595  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7718 (0.7832)  acc1: 86.8000 (86.2182)  acc5: 97.6000 (97.5273)  time: 0.4736  data: 0.3074  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7923 (0.8192)  acc1: 82.8000 (85.0286)  acc5: 97.6000 (97.5810)  time: 0.2149  data: 0.0509  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9527 (0.9066)  acc1: 80.4000 (83.2387)  acc5: 96.4000 (96.5032)  time: 0.2148  data: 0.0505  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1112 (0.9539)  acc1: 78.0000 (81.9317)  acc5: 94.0000 (96.0585)  time: 0.1949  data: 0.0307  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0941 (0.9738)  acc1: 78.0000 (81.2880)  acc5: 94.8000 (95.9680)  time: 0.1828  data: 0.0190  max mem: 27255
Test: Total time: 0:00:12 (0.2560 s / it)
* Acc@1 81.426 Acc@5 95.914 loss 0.970
Accuracy of the model on the 50000 test images: 81.4%
Max accuracy: 81.47%
Epoch: [222]  [   0/2502]  eta: 1:33:49  lr: 0.000719  min_lr: 0.000719  loss: 2.2104 (2.2104)  weight_decay: 0.0500 (0.0500)  time: 2.2499  data: 1.8982  max mem: 27255
Epoch: [222]  [ 200/2502]  eta: 0:13:11  lr: 0.000718  min_lr: 0.000718  loss: 3.1264 (3.0889)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2557 (1.2568)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [222]  [ 400/2502]  eta: 0:11:52  lr: 0.000716  min_lr: 0.000716  loss: 3.2307 (3.0784)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1242 (1.2477)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [222]  [ 600/2502]  eta: 0:10:41  lr: 0.000715  min_lr: 0.000715  loss: 3.0277 (3.0540)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0687 (1.2272)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [222]  [ 800/2502]  eta: 0:09:32  lr: 0.000714  min_lr: 0.000714  loss: 3.3101 (3.0512)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0639 (1.1968)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [222]  [1000/2502]  eta: 0:08:25  lr: 0.000712  min_lr: 0.000712  loss: 3.3083 (3.0497)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1891 (1.1989)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [222]  [1200/2502]  eta: 0:07:17  lr: 0.000711  min_lr: 0.000711  loss: 3.2321 (3.0532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1199 (1.1944)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [222]  [1400/2502]  eta: 0:06:10  lr: 0.000709  min_lr: 0.000709  loss: 3.0179 (3.0578)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0944 (1.1941)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [222]  [1600/2502]  eta: 0:05:02  lr: 0.000708  min_lr: 0.000708  loss: 3.1875 (3.0590)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1451 (1.1910)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [222]  [1800/2502]  eta: 0:03:55  lr: 0.000707  min_lr: 0.000707  loss: 3.2431 (3.0621)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2586 (1.1944)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [222]  [2000/2502]  eta: 0:02:48  lr: 0.000705  min_lr: 0.000705  loss: 3.0004 (3.0652)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1250 (1.1937)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [222]  [2200/2502]  eta: 0:01:41  lr: 0.000704  min_lr: 0.000704  loss: 3.3113 (3.0634)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2029 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [222]  [2400/2502]  eta: 0:00:34  lr: 0.000703  min_lr: 0.000703  loss: 3.3195 (3.0593)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2218 (inf)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [222]  [2501/2502]  eta: 0:00:00  lr: 0.000702  min_lr: 0.000702  loss: 3.0475 (3.0597)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1757 (inf)  time: 0.3001  data: 0.0008  max mem: 27255
Epoch: [222] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000702  min_lr: 0.000702  loss: 3.0475 (3.0595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1757 (inf)
Test:  [ 0/50]  eta: 0:02:08  loss: 0.4974 (0.4974)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.5717  data: 2.3765  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8213 (0.8012)  acc1: 85.2000 (86.2909)  acc5: 98.4000 (97.8545)  time: 0.4044  data: 0.2375  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8213 (0.8327)  acc1: 84.0000 (85.2952)  acc5: 98.0000 (97.8667)  time: 0.2094  data: 0.0448  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9855 (0.9195)  acc1: 82.4000 (83.5355)  acc5: 96.4000 (96.8387)  time: 0.2278  data: 0.0631  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0958 (0.9733)  acc1: 78.0000 (82.0781)  acc5: 94.4000 (96.3122)  time: 0.2216  data: 0.0577  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0958 (0.9970)  acc1: 78.0000 (81.3200)  acc5: 94.4000 (96.0880)  time: 0.1914  data: 0.0277  max mem: 27255
Test: Total time: 0:00:12 (0.2553 s / it)
* Acc@1 81.418 Acc@5 96.050 loss 0.999
Accuracy of the model on the 50000 test images: 81.4%
Max accuracy: 81.47%
Epoch: [223]  [   0/2502]  eta: 1:32:37  lr: 0.000702  min_lr: 0.000702  loss: 2.0247 (2.0247)  weight_decay: 0.0500 (0.0500)  time: 2.2213  data: 1.4743  max mem: 27255
Epoch: [223]  [ 200/2502]  eta: 0:13:11  lr: 0.000701  min_lr: 0.000701  loss: 3.1337 (3.0186)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1774 (1.2077)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [223]  [ 400/2502]  eta: 0:11:52  lr: 0.000699  min_lr: 0.000699  loss: 3.2766 (3.0554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1773 (1.2090)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [223]  [ 600/2502]  eta: 0:10:41  lr: 0.000698  min_lr: 0.000698  loss: 3.0956 (3.0525)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0868 (1.2083)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [223]  [ 800/2502]  eta: 0:09:32  lr: 0.000696  min_lr: 0.000696  loss: 3.3261 (3.0591)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2319 (1.2173)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [223]  [1000/2502]  eta: 0:08:24  lr: 0.000695  min_lr: 0.000695  loss: 3.2001 (3.0664)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1805 (1.2306)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [223]  [1200/2502]  eta: 0:07:16  lr: 0.000694  min_lr: 0.000694  loss: 3.0844 (3.0628)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.0991 (1.2177)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [223]  [1400/2502]  eta: 0:06:09  lr: 0.000692  min_lr: 0.000692  loss: 2.9744 (3.0607)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2412 (1.2154)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [223]  [1600/2502]  eta: 0:05:02  lr: 0.000691  min_lr: 0.000691  loss: 3.1097 (3.0670)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1811 (1.2126)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [223]  [1800/2502]  eta: 0:03:55  lr: 0.000690  min_lr: 0.000690  loss: 2.8698 (3.0651)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1968 (1.2095)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [223]  [2000/2502]  eta: 0:02:48  lr: 0.000688  min_lr: 0.000688  loss: 3.3390 (3.0663)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2529 (1.2182)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [223]  [2200/2502]  eta: 0:01:41  lr: 0.000687  min_lr: 0.000687  loss: 3.0575 (3.0644)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2476 (1.2199)  time: 0.3328  data: 0.0003  max mem: 27255
Epoch: [223]  [2400/2502]  eta: 0:00:34  lr: 0.000686  min_lr: 0.000686  loss: 3.1882 (3.0624)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2330 (1.2179)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [223]  [2501/2502]  eta: 0:00:00  lr: 0.000685  min_lr: 0.000685  loss: 3.2438 (3.0613)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1156 (1.2156)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [223] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000685  min_lr: 0.000685  loss: 3.2438 (3.0536)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1156 (1.2156)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.5270 (0.5270)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.2501  data: 3.0570  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7841 (0.8082)  acc1: 86.8000 (86.9091)  acc5: 97.6000 (97.4909)  time: 0.4803  data: 0.3137  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8091 (0.8275)  acc1: 83.6000 (85.6381)  acc5: 97.6000 (97.6191)  time: 0.2085  data: 0.0445  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9781 (0.9202)  acc1: 82.0000 (83.4710)  acc5: 96.0000 (96.4000)  time: 0.1960  data: 0.0320  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1448 (0.9750)  acc1: 78.4000 (82.0098)  acc5: 94.0000 (95.9610)  time: 0.1834  data: 0.0191  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1448 (0.9919)  acc1: 77.6000 (81.3200)  acc5: 94.4000 (95.8880)  time: 0.1862  data: 0.0188  max mem: 27255
Test: Total time: 0:00:12 (0.2547 s / it)
* Acc@1 81.448 Acc@5 95.946 loss 0.990
Accuracy of the model on the 50000 test images: 81.4%
Max accuracy: 81.47%
Epoch: [224]  [   0/2502]  eta: 1:22:21  lr: 0.000685  min_lr: 0.000685  loss: 3.2701 (3.2701)  weight_decay: 0.0500 (0.0500)  time: 1.9751  data: 1.6407  max mem: 27255
Epoch: [224]  [ 200/2502]  eta: 0:13:08  lr: 0.000684  min_lr: 0.000684  loss: 3.1638 (3.0944)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2080 (1.2414)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [224]  [ 400/2502]  eta: 0:11:51  lr: 0.000682  min_lr: 0.000682  loss: 3.2164 (3.0654)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1085 (1.2120)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [224]  [ 600/2502]  eta: 0:10:41  lr: 0.000681  min_lr: 0.000681  loss: 3.0028 (3.0632)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1731 (1.2113)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [224]  [ 800/2502]  eta: 0:09:32  lr: 0.000680  min_lr: 0.000680  loss: 3.2461 (3.0592)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3452 (1.2229)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [224]  [1000/2502]  eta: 0:08:25  lr: 0.000678  min_lr: 0.000678  loss: 3.1290 (3.0599)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1344 (1.2277)  time: 0.3377  data: 0.0004  max mem: 27255
Epoch: [224]  [1200/2502]  eta: 0:07:17  lr: 0.000677  min_lr: 0.000677  loss: 3.2672 (3.0605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1884 (1.2238)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [224]  [1400/2502]  eta: 0:06:10  lr: 0.000676  min_lr: 0.000676  loss: 3.2269 (3.0605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1535 (1.2206)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [224]  [1600/2502]  eta: 0:05:02  lr: 0.000674  min_lr: 0.000674  loss: 3.1144 (3.0602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2590 (1.2236)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [224]  [1800/2502]  eta: 0:03:55  lr: 0.000673  min_lr: 0.000673  loss: 3.2013 (3.0647)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2299 (1.2252)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [224]  [2000/2502]  eta: 0:02:48  lr: 0.000671  min_lr: 0.000671  loss: 3.1111 (3.0664)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1599 (1.2267)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [224]  [2200/2502]  eta: 0:01:41  lr: 0.000670  min_lr: 0.000670  loss: 3.0228 (3.0621)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1524 (1.2235)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [224]  [2400/2502]  eta: 0:00:34  lr: 0.000669  min_lr: 0.000669  loss: 3.0080 (3.0591)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1106 (1.2188)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [224]  [2501/2502]  eta: 0:00:00  lr: 0.000668  min_lr: 0.000668  loss: 3.0668 (3.0596)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2239 (1.2200)  time: 0.3035  data: 0.0009  max mem: 27255
Epoch: [224] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000668  min_lr: 0.000668  loss: 3.0668 (3.0454)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2239 (1.2200)
Test:  [ 0/50]  eta: 0:03:11  loss: 0.5221 (0.5221)  acc1: 93.2000 (93.2000)  acc5: 98.8000 (98.8000)  time: 3.8323  data: 3.6357  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8046 (0.7925)  acc1: 88.4000 (86.7636)  acc5: 98.4000 (97.7818)  time: 0.4976  data: 0.3309  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8218 (0.8316)  acc1: 84.4000 (85.4857)  acc5: 98.0000 (97.8095)  time: 0.1642  data: 0.0005  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0123 (0.9257)  acc1: 82.0000 (83.7290)  acc5: 96.4000 (96.7484)  time: 0.1730  data: 0.0093  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1575 (0.9830)  acc1: 77.6000 (82.2244)  acc5: 94.0000 (96.0488)  time: 0.2135  data: 0.0498  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1761 (1.0058)  acc1: 76.0000 (81.6400)  acc5: 94.4000 (95.8960)  time: 0.2150  data: 0.0513  max mem: 27255
Test: Total time: 0:00:12 (0.2597 s / it)
* Acc@1 81.598 Acc@5 95.986 loss 1.006
Accuracy of the model on the 50000 test images: 81.6%
Max accuracy: 81.60%
Epoch: [225]  [   0/2502]  eta: 1:15:04  lr: 0.000668  min_lr: 0.000668  loss: 1.8103 (1.8103)  weight_decay: 0.0500 (0.0500)  time: 1.8004  data: 1.4583  max mem: 27255
Epoch: [225]  [ 200/2502]  eta: 0:13:07  lr: 0.000667  min_lr: 0.000667  loss: 3.2685 (3.0246)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1994 (1.1984)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [225]  [ 400/2502]  eta: 0:11:50  lr: 0.000665  min_lr: 0.000665  loss: 3.0819 (3.0454)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1895 (1.2097)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [225]  [ 600/2502]  eta: 0:10:42  lr: 0.000664  min_lr: 0.000664  loss: 3.1851 (3.0232)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1458 (1.2200)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [225]  [ 800/2502]  eta: 0:09:33  lr: 0.000663  min_lr: 0.000663  loss: 3.2456 (3.0320)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1341 (1.2158)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [225]  [1000/2502]  eta: 0:08:25  lr: 0.000661  min_lr: 0.000661  loss: 3.1707 (3.0274)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1458 (1.2119)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [225]  [1200/2502]  eta: 0:07:17  lr: 0.000660  min_lr: 0.000660  loss: 3.1170 (3.0323)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2550 (1.2121)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [225]  [1400/2502]  eta: 0:06:10  lr: 0.000659  min_lr: 0.000659  loss: 3.1022 (3.0380)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2263 (1.2214)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [225]  [1600/2502]  eta: 0:05:03  lr: 0.000657  min_lr: 0.000657  loss: 3.2857 (3.0411)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2750 (1.2279)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [225]  [1800/2502]  eta: 0:03:55  lr: 0.000656  min_lr: 0.000656  loss: 3.0789 (3.0416)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1855 (1.2210)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [225]  [2000/2502]  eta: 0:02:48  lr: 0.000655  min_lr: 0.000655  loss: 2.9154 (3.0413)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1731 (1.2197)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [225]  [2200/2502]  eta: 0:01:41  lr: 0.000653  min_lr: 0.000653  loss: 3.0669 (3.0409)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1535 (1.2158)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [225]  [2400/2502]  eta: 0:00:34  lr: 0.000652  min_lr: 0.000652  loss: 3.0329 (3.0388)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2050 (1.2194)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [225]  [2501/2502]  eta: 0:00:00  lr: 0.000652  min_lr: 0.000652  loss: 3.0071 (3.0383)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2817 (1.2219)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [225] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.000652  min_lr: 0.000652  loss: 3.0071 (3.0435)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2817 (1.2219)
Test:  [ 0/50]  eta: 0:02:45  loss: 0.4476 (0.4476)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.3001  data: 3.1078  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7431 (0.7247)  acc1: 86.8000 (87.1636)  acc5: 98.0000 (97.6727)  time: 0.4494  data: 0.2829  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7431 (0.7532)  acc1: 84.0000 (86.1714)  acc5: 98.0000 (97.7333)  time: 0.1738  data: 0.0098  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9226 (0.8434)  acc1: 81.6000 (84.0387)  acc5: 96.4000 (96.7226)  time: 0.1791  data: 0.0151  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0636 (0.9063)  acc1: 77.6000 (82.4293)  acc5: 94.0000 (96.0878)  time: 0.1882  data: 0.0243  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0741 (0.9250)  acc1: 76.4000 (81.7920)  acc5: 94.4000 (96.0080)  time: 0.1836  data: 0.0197  max mem: 27255
Test: Total time: 0:00:12 (0.2423 s / it)
* Acc@1 81.738 Acc@5 96.036 loss 0.928
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.74%
Epoch: [226]  [   0/2502]  eta: 1:25:52  lr: 0.000651  min_lr: 0.000651  loss: 3.2479 (3.2479)  weight_decay: 0.0500 (0.0500)  time: 2.0595  data: 1.7041  max mem: 27255
Epoch: [226]  [ 200/2502]  eta: 0:13:12  lr: 0.000650  min_lr: 0.000650  loss: 3.1362 (3.0278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2777 (1.3105)  time: 0.3360  data: 0.0006  max mem: 27255
Epoch: [226]  [ 400/2502]  eta: 0:11:54  lr: 0.000649  min_lr: 0.000649  loss: 3.1931 (3.0656)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2446 (1.2797)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [226]  [ 600/2502]  eta: 0:10:42  lr: 0.000648  min_lr: 0.000648  loss: 3.0505 (3.0584)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1214 (1.2444)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [226]  [ 800/2502]  eta: 0:09:34  lr: 0.000646  min_lr: 0.000646  loss: 2.8073 (3.0406)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1958 (1.2375)  time: 0.3494  data: 0.0005  max mem: 27255
Epoch: [226]  [1000/2502]  eta: 0:08:25  lr: 0.000645  min_lr: 0.000645  loss: 3.3309 (3.0463)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2048 (1.2378)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [226]  [1200/2502]  eta: 0:07:17  lr: 0.000644  min_lr: 0.000644  loss: 3.1200 (3.0352)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2319 (1.2419)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [226]  [1400/2502]  eta: 0:06:10  lr: 0.000642  min_lr: 0.000642  loss: 3.1608 (3.0261)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2260 (1.2451)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [226]  [1600/2502]  eta: 0:05:02  lr: 0.000641  min_lr: 0.000641  loss: 3.1934 (3.0296)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2199 (1.2430)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [226]  [1800/2502]  eta: 0:03:55  lr: 0.000640  min_lr: 0.000640  loss: 3.2351 (3.0363)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2417 (1.2425)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [226]  [2000/2502]  eta: 0:02:48  lr: 0.000638  min_lr: 0.000638  loss: 3.1388 (3.0366)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1649 (1.2441)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [226]  [2200/2502]  eta: 0:01:41  lr: 0.000637  min_lr: 0.000637  loss: 3.0203 (3.0380)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1025 (1.2393)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [226]  [2400/2502]  eta: 0:00:34  lr: 0.000636  min_lr: 0.000636  loss: 3.0612 (3.0398)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2273 (1.2383)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [226]  [2501/2502]  eta: 0:00:00  lr: 0.000635  min_lr: 0.000635  loss: 2.9148 (3.0383)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2246 (1.2389)  time: 0.3000  data: 0.0008  max mem: 27255
Epoch: [226] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.000635  min_lr: 0.000635  loss: 2.9148 (3.0357)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2246 (1.2389)
Test:  [ 0/50]  eta: 0:03:08  loss: 0.4791 (0.4791)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.7687  data: 3.5672  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7823 (0.7781)  acc1: 87.6000 (86.6909)  acc5: 98.0000 (97.7455)  time: 0.4941  data: 0.3264  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7823 (0.7988)  acc1: 84.4000 (86.0571)  acc5: 98.0000 (97.8095)  time: 0.1714  data: 0.0066  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9991 (0.9010)  acc1: 80.4000 (83.9742)  acc5: 95.6000 (96.6194)  time: 0.2064  data: 0.0399  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1398 (0.9614)  acc1: 78.8000 (82.4488)  acc5: 93.6000 (95.9902)  time: 0.2523  data: 0.0860  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1449 (0.9819)  acc1: 77.6000 (81.7760)  acc5: 94.0000 (95.9120)  time: 0.2158  data: 0.0517  max mem: 27255
Test: Total time: 0:00:13 (0.2761 s / it)
* Acc@1 81.704 Acc@5 95.978 loss 0.983
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.74%
Epoch: [227]  [   0/2502]  eta: 1:32:12  lr: 0.000635  min_lr: 0.000635  loss: 1.8439 (1.8439)  weight_decay: 0.0500 (0.0500)  time: 2.2111  data: 1.6928  max mem: 27255
Epoch: [227]  [ 200/2502]  eta: 0:13:11  lr: 0.000634  min_lr: 0.000634  loss: 3.0101 (3.0270)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2181 (1.2731)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [227]  [ 400/2502]  eta: 0:11:53  lr: 0.000632  min_lr: 0.000632  loss: 3.2710 (3.0411)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2092 (1.2663)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [227]  [ 600/2502]  eta: 0:10:41  lr: 0.000631  min_lr: 0.000631  loss: 3.1272 (3.0501)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1340 (1.2656)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [227]  [ 800/2502]  eta: 0:09:32  lr: 0.000630  min_lr: 0.000630  loss: 3.0344 (3.0324)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1840 (1.2726)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [227]  [1000/2502]  eta: 0:08:25  lr: 0.000628  min_lr: 0.000628  loss: 3.0015 (3.0394)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2618 (1.2611)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [227]  [1200/2502]  eta: 0:07:17  lr: 0.000627  min_lr: 0.000627  loss: 3.1924 (3.0481)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1773 (1.2526)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [227]  [1400/2502]  eta: 0:06:09  lr: 0.000626  min_lr: 0.000626  loss: 3.1957 (3.0484)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1786 (inf)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [227]  [1600/2502]  eta: 0:05:02  lr: 0.000625  min_lr: 0.000625  loss: 3.1417 (3.0512)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2998 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [227]  [1800/2502]  eta: 0:03:55  lr: 0.000623  min_lr: 0.000623  loss: 3.2262 (3.0509)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2379 (inf)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [227]  [2000/2502]  eta: 0:02:48  lr: 0.000622  min_lr: 0.000622  loss: 3.1078 (3.0421)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1228 (inf)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [227]  [2200/2502]  eta: 0:01:41  lr: 0.000621  min_lr: 0.000621  loss: 3.2886 (3.0420)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1620 (inf)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [227]  [2400/2502]  eta: 0:00:34  lr: 0.000619  min_lr: 0.000619  loss: 3.3022 (3.0463)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1931 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [227]  [2501/2502]  eta: 0:00:00  lr: 0.000619  min_lr: 0.000619  loss: 3.0622 (3.0459)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1768 (inf)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [227] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000619  min_lr: 0.000619  loss: 3.0622 (3.0360)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1768 (inf)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5706 (0.5706)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.0090  data: 2.8054  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8696 (0.8416)  acc1: 88.0000 (86.8000)  acc5: 98.4000 (98.0727)  time: 0.4430  data: 0.2754  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8696 (0.8637)  acc1: 84.4000 (86.0000)  acc5: 98.0000 (97.9238)  time: 0.1848  data: 0.0200  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9754 (0.9571)  acc1: 81.6000 (84.0258)  acc5: 96.8000 (96.6710)  time: 0.1850  data: 0.0202  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1944 (1.0142)  acc1: 78.4000 (82.5561)  acc5: 93.6000 (96.0488)  time: 0.2044  data: 0.0406  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1649 (1.0346)  acc1: 78.0000 (81.7280)  acc5: 94.8000 (95.9840)  time: 0.1996  data: 0.0355  max mem: 27255
Test: Total time: 0:00:12 (0.2496 s / it)
* Acc@1 81.748 Acc@5 96.084 loss 1.033
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.75%
Epoch: [228]  [   0/2502]  eta: 1:31:45  lr: 0.000619  min_lr: 0.000619  loss: 3.6516 (3.6516)  weight_decay: 0.0500 (0.0500)  time: 2.2003  data: 1.8573  max mem: 27255
Epoch: [228]  [ 200/2502]  eta: 0:13:10  lr: 0.000617  min_lr: 0.000617  loss: 3.2859 (3.0665)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1853 (1.2112)  time: 0.3385  data: 0.0005  max mem: 27255
Epoch: [228]  [ 400/2502]  eta: 0:11:54  lr: 0.000616  min_lr: 0.000616  loss: 3.0530 (3.0498)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2622 (1.2442)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [228]  [ 600/2502]  eta: 0:10:42  lr: 0.000615  min_lr: 0.000615  loss: 3.2640 (3.0289)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2206 (1.2522)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [228]  [ 800/2502]  eta: 0:09:33  lr: 0.000614  min_lr: 0.000614  loss: 3.2835 (3.0323)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1685 (1.2454)  time: 0.3359  data: 0.0005  max mem: 27255
Epoch: [228]  [1000/2502]  eta: 0:08:25  lr: 0.000612  min_lr: 0.000612  loss: 3.1711 (3.0255)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2985 (1.2421)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [228]  [1200/2502]  eta: 0:07:17  lr: 0.000611  min_lr: 0.000611  loss: 3.0302 (3.0169)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1753 (1.2338)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [228]  [1400/2502]  eta: 0:06:10  lr: 0.000610  min_lr: 0.000610  loss: 3.1288 (3.0242)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3016 (1.2362)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [228]  [1600/2502]  eta: 0:05:02  lr: 0.000608  min_lr: 0.000608  loss: 3.0549 (3.0192)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1748 (1.2353)  time: 0.3390  data: 0.0003  max mem: 27255
Epoch: [228]  [1800/2502]  eta: 0:03:55  lr: 0.000607  min_lr: 0.000607  loss: 2.9721 (3.0180)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2727 (1.2410)  time: 0.3394  data: 0.0004  max mem: 27255
Epoch: [228]  [2000/2502]  eta: 0:02:48  lr: 0.000606  min_lr: 0.000606  loss: 3.1019 (3.0222)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2758 (1.2384)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [228]  [2200/2502]  eta: 0:01:41  lr: 0.000605  min_lr: 0.000605  loss: 3.3786 (3.0233)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1966 (1.2418)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [228]  [2400/2502]  eta: 0:00:34  lr: 0.000603  min_lr: 0.000603  loss: 3.0719 (3.0220)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1623 (nan)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [228]  [2501/2502]  eta: 0:00:00  lr: 0.000603  min_lr: 0.000603  loss: 3.0732 (3.0228)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1420 (nan)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [228] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000603  min_lr: 0.000603  loss: 3.0732 (3.0216)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1420 (nan)
Test:  [ 0/50]  eta: 0:02:52  loss: 0.6257 (0.6257)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.4544  data: 3.2547  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8982 (0.8698)  acc1: 86.0000 (86.6546)  acc5: 98.0000 (97.6727)  time: 0.4633  data: 0.2962  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8982 (0.8927)  acc1: 84.4000 (85.7333)  acc5: 98.0000 (97.7333)  time: 0.1643  data: 0.0004  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0626 (0.9886)  acc1: 81.6000 (83.7161)  acc5: 95.6000 (96.5677)  time: 0.1654  data: 0.0004  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2004 (1.0413)  acc1: 78.0000 (82.4195)  acc5: 94.4000 (96.0098)  time: 0.1657  data: 0.0003  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1919 (1.0594)  acc1: 77.6000 (81.8080)  acc5: 94.8000 (95.9520)  time: 0.1649  data: 0.0002  max mem: 27255
Test: Total time: 0:00:11 (0.2326 s / it)
* Acc@1 81.590 Acc@5 96.058 loss 1.059
Accuracy of the model on the 50000 test images: 81.6%
Max accuracy: 81.75%
Epoch: [229]  [   0/2502]  eta: 1:21:17  lr: 0.000603  min_lr: 0.000603  loss: 2.0484 (2.0484)  weight_decay: 0.0500 (0.0500)  time: 1.9495  data: 1.5217  max mem: 27255
Epoch: [229]  [ 200/2502]  eta: 0:13:06  lr: 0.000601  min_lr: 0.000601  loss: 3.2229 (2.9923)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2447 (1.2551)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [229]  [ 400/2502]  eta: 0:11:49  lr: 0.000600  min_lr: 0.000600  loss: 2.8573 (2.9954)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1166 (1.2429)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [229]  [ 600/2502]  eta: 0:10:40  lr: 0.000599  min_lr: 0.000599  loss: 3.1307 (2.9805)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2586 (1.2573)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [229]  [ 800/2502]  eta: 0:09:31  lr: 0.000597  min_lr: 0.000597  loss: 2.7834 (2.9890)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2314 (1.2492)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [229]  [1000/2502]  eta: 0:08:23  lr: 0.000596  min_lr: 0.000596  loss: 3.1757 (2.9965)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2523 (1.2523)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [229]  [1200/2502]  eta: 0:07:16  lr: 0.000595  min_lr: 0.000595  loss: 3.2105 (2.9949)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2450 (1.2612)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [229]  [1400/2502]  eta: 0:06:09  lr: 0.000594  min_lr: 0.000594  loss: 3.1532 (2.9981)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2177 (1.2605)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [229]  [1600/2502]  eta: 0:05:02  lr: 0.000592  min_lr: 0.000592  loss: 3.3102 (3.0009)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2584 (1.2630)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [229]  [1800/2502]  eta: 0:03:55  lr: 0.000591  min_lr: 0.000591  loss: 2.8859 (3.0054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1677 (1.2609)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [229]  [2000/2502]  eta: 0:02:48  lr: 0.000590  min_lr: 0.000590  loss: 3.0270 (3.0052)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1302 (1.2552)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [229]  [2200/2502]  eta: 0:01:41  lr: 0.000589  min_lr: 0.000589  loss: 3.2008 (3.0097)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1346 (1.2512)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [229]  [2400/2502]  eta: 0:00:34  lr: 0.000587  min_lr: 0.000587  loss: 3.1215 (3.0106)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2004 (1.2477)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [229]  [2501/2502]  eta: 0:00:00  lr: 0.000587  min_lr: 0.000587  loss: 3.1922 (3.0120)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2079 (1.2469)  time: 0.2996  data: 0.0007  max mem: 27255
Epoch: [229] Total time: 0:13:56 (0.3345 s / it)
Averaged stats: lr: 0.000587  min_lr: 0.000587  loss: 3.1922 (3.0182)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2079 (1.2469)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5969 (0.5969)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.0059  data: 2.8156  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.9093 (0.8921)  acc1: 88.4000 (87.3091)  acc5: 98.0000 (97.5273)  time: 0.4473  data: 0.2812  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.9144 (0.9201)  acc1: 84.4000 (85.9810)  acc5: 98.0000 (97.6952)  time: 0.2018  data: 0.0381  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0622 (1.0116)  acc1: 80.8000 (83.6645)  acc5: 96.4000 (96.5419)  time: 0.2135  data: 0.0499  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1890 (1.0609)  acc1: 78.8000 (82.4000)  acc5: 94.0000 (95.9610)  time: 0.2290  data: 0.0654  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1890 (1.0819)  acc1: 78.8000 (81.8720)  acc5: 94.0000 (95.8880)  time: 0.2032  data: 0.0398  max mem: 27255
Test: Total time: 0:00:13 (0.2635 s / it)
* Acc@1 81.710 Acc@5 95.990 loss 1.082
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.75%
Epoch: [230]  [   0/2502]  eta: 1:34:18  lr: 0.000587  min_lr: 0.000587  loss: 2.1224 (2.1224)  weight_decay: 0.0500 (0.0500)  time: 2.2618  data: 1.4895  max mem: 27255
Epoch: [230]  [ 200/2502]  eta: 0:13:09  lr: 0.000585  min_lr: 0.000585  loss: 3.2412 (3.0558)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1570 (1.2252)  time: 0.3326  data: 0.0003  max mem: 27255
Epoch: [230]  [ 400/2502]  eta: 0:11:52  lr: 0.000584  min_lr: 0.000584  loss: 3.2206 (3.0487)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2221 (1.2432)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [230]  [ 600/2502]  eta: 0:10:41  lr: 0.000583  min_lr: 0.000583  loss: 3.2497 (3.0404)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2892 (1.2584)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [230]  [ 800/2502]  eta: 0:09:32  lr: 0.000582  min_lr: 0.000582  loss: 3.1877 (3.0317)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1779 (1.2737)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [230]  [1000/2502]  eta: 0:08:24  lr: 0.000580  min_lr: 0.000580  loss: 3.3201 (3.0163)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2261 (1.2763)  time: 0.3348  data: 0.0003  max mem: 27255
Epoch: [230]  [1200/2502]  eta: 0:07:17  lr: 0.000579  min_lr: 0.000579  loss: 3.0345 (3.0054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1847 (1.2699)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [230]  [1400/2502]  eta: 0:06:09  lr: 0.000578  min_lr: 0.000578  loss: 3.0591 (3.0062)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2194 (1.2661)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [230]  [1600/2502]  eta: 0:05:02  lr: 0.000577  min_lr: 0.000577  loss: 3.2835 (3.0133)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1943 (1.2688)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [230]  [1800/2502]  eta: 0:03:55  lr: 0.000575  min_lr: 0.000575  loss: 3.0316 (3.0027)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2034 (1.2629)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [230]  [2000/2502]  eta: 0:02:48  lr: 0.000574  min_lr: 0.000574  loss: 2.8105 (3.0016)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1919 (1.2585)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [230]  [2200/2502]  eta: 0:01:41  lr: 0.000573  min_lr: 0.000573  loss: 3.1192 (2.9988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2628 (1.2596)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [230]  [2400/2502]  eta: 0:00:34  lr: 0.000571  min_lr: 0.000571  loss: 3.0587 (3.0023)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2263 (1.2616)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [230]  [2501/2502]  eta: 0:00:00  lr: 0.000571  min_lr: 0.000571  loss: 2.9512 (2.9988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2588 (1.2640)  time: 0.3008  data: 0.0009  max mem: 27255
Epoch: [230] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000571  min_lr: 0.000571  loss: 2.9512 (3.0050)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2588 (1.2640)
Test:  [ 0/50]  eta: 0:02:19  loss: 0.5459 (0.5459)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.7991  data: 2.6029  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7895 (0.8038)  acc1: 87.2000 (87.1273)  acc5: 98.0000 (97.7455)  time: 0.4540  data: 0.2800  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8271 (0.8307)  acc1: 84.0000 (85.5238)  acc5: 98.0000 (97.7524)  time: 0.1932  data: 0.0242  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9713 (0.9189)  acc1: 80.4000 (83.7032)  acc5: 95.6000 (96.5677)  time: 0.1865  data: 0.0214  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1452 (0.9693)  acc1: 78.0000 (82.3220)  acc5: 94.0000 (96.0976)  time: 0.2050  data: 0.0393  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0939 (0.9901)  acc1: 76.0000 (81.6880)  acc5: 94.4000 (95.9120)  time: 0.1951  data: 0.0295  max mem: 27255
Test: Total time: 0:00:12 (0.2484 s / it)
* Acc@1 81.732 Acc@5 96.120 loss 0.989
Accuracy of the model on the 50000 test images: 81.7%
Max accuracy: 81.75%
Epoch: [231]  [   0/2502]  eta: 1:35:55  lr: 0.000571  min_lr: 0.000571  loss: 3.3754 (3.3754)  weight_decay: 0.0500 (0.0500)  time: 2.3005  data: 1.9638  max mem: 27255
Epoch: [231]  [ 200/2502]  eta: 0:13:14  lr: 0.000570  min_lr: 0.000570  loss: 3.2145 (3.0609)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1527 (1.3190)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [231]  [ 400/2502]  eta: 0:11:54  lr: 0.000568  min_lr: 0.000568  loss: 3.0349 (3.0643)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3894 (1.3051)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [231]  [ 600/2502]  eta: 0:10:42  lr: 0.000567  min_lr: 0.000567  loss: 3.1555 (3.0414)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4206 (1.3171)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [231]  [ 800/2502]  eta: 0:09:33  lr: 0.000566  min_lr: 0.000566  loss: 3.0735 (3.0288)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1767 (1.3079)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [231]  [1000/2502]  eta: 0:08:25  lr: 0.000565  min_lr: 0.000565  loss: 3.1270 (3.0184)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1773 (1.2908)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [231]  [1200/2502]  eta: 0:07:17  lr: 0.000563  min_lr: 0.000563  loss: 3.0494 (3.0173)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2016 (1.2823)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [231]  [1400/2502]  eta: 0:06:10  lr: 0.000562  min_lr: 0.000562  loss: 3.2930 (3.0111)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2984 (1.2903)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [231]  [1600/2502]  eta: 0:05:02  lr: 0.000561  min_lr: 0.000561  loss: 3.1556 (3.0106)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2421 (1.2859)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [231]  [1800/2502]  eta: 0:03:55  lr: 0.000560  min_lr: 0.000560  loss: 3.1066 (3.0057)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2821 (1.2851)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [231]  [2000/2502]  eta: 0:02:48  lr: 0.000558  min_lr: 0.000558  loss: 3.0442 (3.0040)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1346 (1.2863)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [231]  [2200/2502]  eta: 0:01:41  lr: 0.000557  min_lr: 0.000557  loss: 3.0160 (2.9998)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2500 (1.2841)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [231]  [2400/2502]  eta: 0:00:34  lr: 0.000556  min_lr: 0.000556  loss: 2.9268 (2.9973)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1302 (1.2834)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [231]  [2501/2502]  eta: 0:00:00  lr: 0.000555  min_lr: 0.000555  loss: 3.2511 (3.0004)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2147 (1.2828)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [231] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000555  min_lr: 0.000555  loss: 3.2511 (2.9992)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2147 (1.2828)
Test:  [ 0/50]  eta: 0:02:21  loss: 0.5717 (0.5717)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.8380  data: 2.6424  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8816 (0.8575)  acc1: 88.0000 (86.5818)  acc5: 97.6000 (97.7818)  time: 0.4408  data: 0.2741  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8957 (0.8804)  acc1: 84.0000 (86.0381)  acc5: 97.6000 (97.7333)  time: 0.1832  data: 0.0195  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0058 (0.9722)  acc1: 82.0000 (83.8710)  acc5: 95.2000 (96.6581)  time: 0.1983  data: 0.0346  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1604 (1.0144)  acc1: 78.8000 (82.5854)  acc5: 94.4000 (96.1561)  time: 0.2358  data: 0.0722  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1546 (1.0317)  acc1: 78.0000 (81.9120)  acc5: 94.4000 (96.0400)  time: 0.2020  data: 0.0386  max mem: 27255
Test: Total time: 0:00:12 (0.2556 s / it)
* Acc@1 81.812 Acc@5 96.058 loss 1.030
Accuracy of the model on the 50000 test images: 81.8%
Max accuracy: 81.81%
Epoch: [232]  [   0/2502]  eta: 1:20:29  lr: 0.000555  min_lr: 0.000555  loss: 3.1794 (3.1794)  weight_decay: 0.0500 (0.0500)  time: 1.9302  data: 1.5907  max mem: 27255
Epoch: [232]  [ 200/2502]  eta: 0:13:05  lr: 0.000554  min_lr: 0.000554  loss: 3.1902 (2.9758)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2136 (1.2469)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [232]  [ 400/2502]  eta: 0:11:49  lr: 0.000553  min_lr: 0.000553  loss: 3.2298 (2.9857)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2603 (1.2795)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [232]  [ 600/2502]  eta: 0:10:40  lr: 0.000552  min_lr: 0.000552  loss: 2.7112 (2.9783)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2237 (1.2799)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [232]  [ 800/2502]  eta: 0:09:32  lr: 0.000550  min_lr: 0.000550  loss: 2.8760 (2.9672)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2450 (1.2800)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [232]  [1000/2502]  eta: 0:08:25  lr: 0.000549  min_lr: 0.000549  loss: 3.0175 (2.9821)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1455 (1.2717)  time: 0.3449  data: 0.0005  max mem: 27255
Epoch: [232]  [1200/2502]  eta: 0:07:17  lr: 0.000548  min_lr: 0.000548  loss: 3.2215 (2.9903)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2973 (1.2736)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [232]  [1400/2502]  eta: 0:06:09  lr: 0.000547  min_lr: 0.000547  loss: 3.2039 (2.9916)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3240 (1.2769)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [232]  [1600/2502]  eta: 0:05:02  lr: 0.000545  min_lr: 0.000545  loss: 2.9432 (2.9963)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2495 (1.2819)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [232]  [1800/2502]  eta: 0:03:55  lr: 0.000544  min_lr: 0.000544  loss: 2.9642 (2.9959)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2488 (1.2829)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [232]  [2000/2502]  eta: 0:02:48  lr: 0.000543  min_lr: 0.000543  loss: 3.0780 (2.9950)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3442 (1.2855)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [232]  [2200/2502]  eta: 0:01:41  lr: 0.000542  min_lr: 0.000542  loss: 3.1611 (2.9930)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3236 (1.2850)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [232]  [2400/2502]  eta: 0:00:34  lr: 0.000540  min_lr: 0.000540  loss: 3.0200 (2.9941)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2600 (1.2827)  time: 0.3325  data: 0.0003  max mem: 27255
Epoch: [232]  [2501/2502]  eta: 0:00:00  lr: 0.000540  min_lr: 0.000540  loss: 2.9648 (2.9911)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2950 (1.2838)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [232] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.000540  min_lr: 0.000540  loss: 2.9648 (2.9993)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2950 (1.2838)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.5353 (0.5353)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.6014  data: 2.4028  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.7842 (0.7945)  acc1: 88.8000 (87.0545)  acc5: 98.4000 (97.8546)  time: 0.4202  data: 0.2531  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8406 (0.8226)  acc1: 85.2000 (86.2095)  acc5: 98.0000 (97.8667)  time: 0.2356  data: 0.0716  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9364 (0.9098)  acc1: 82.4000 (84.2065)  acc5: 95.6000 (96.6710)  time: 0.2319  data: 0.0680  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1268 (0.9589)  acc1: 78.4000 (82.8195)  acc5: 94.0000 (96.0976)  time: 0.2286  data: 0.0648  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1268 (0.9839)  acc1: 77.6000 (82.0000)  acc5: 94.4000 (95.9760)  time: 0.2132  data: 0.0495  max mem: 27255
Test: Total time: 0:00:13 (0.2689 s / it)
* Acc@1 81.938 Acc@5 96.176 loss 0.985
Accuracy of the model on the 50000 test images: 81.9%
Max accuracy: 81.94%
Epoch: [233]  [   0/2502]  eta: 1:28:09  lr: 0.000540  min_lr: 0.000540  loss: 3.2957 (3.2957)  weight_decay: 0.0500 (0.0500)  time: 2.1141  data: 1.7661  max mem: 27255
Epoch: [233]  [ 200/2502]  eta: 0:13:10  lr: 0.000539  min_lr: 0.000539  loss: 3.0310 (2.9659)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3139 (1.2867)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [233]  [ 400/2502]  eta: 0:11:53  lr: 0.000537  min_lr: 0.000537  loss: 3.0290 (2.9931)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2246 (1.2824)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [233]  [ 600/2502]  eta: 0:10:42  lr: 0.000536  min_lr: 0.000536  loss: 2.8960 (2.9815)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2998 (1.2978)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [233]  [ 800/2502]  eta: 0:09:33  lr: 0.000535  min_lr: 0.000535  loss: 3.2110 (3.0007)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1697 (1.2816)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [233]  [1000/2502]  eta: 0:08:25  lr: 0.000534  min_lr: 0.000534  loss: 3.1024 (3.0048)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4120 (1.2958)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [233]  [1200/2502]  eta: 0:07:17  lr: 0.000533  min_lr: 0.000533  loss: 3.0840 (3.0160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3713 (1.2946)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [233]  [1400/2502]  eta: 0:06:09  lr: 0.000531  min_lr: 0.000531  loss: 2.8955 (3.0163)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1735 (1.2901)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [233]  [1600/2502]  eta: 0:05:02  lr: 0.000530  min_lr: 0.000530  loss: 3.1097 (3.0179)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1599 (1.2854)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [233]  [1800/2502]  eta: 0:03:55  lr: 0.000529  min_lr: 0.000529  loss: 3.0001 (3.0192)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2178 (1.2811)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [233]  [2000/2502]  eta: 0:02:48  lr: 0.000528  min_lr: 0.000528  loss: 3.1665 (3.0153)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2208 (1.2813)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [233]  [2200/2502]  eta: 0:01:41  lr: 0.000526  min_lr: 0.000526  loss: 2.9632 (3.0208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2633 (1.2861)  time: 0.3372  data: 0.0005  max mem: 27255
Epoch: [233]  [2400/2502]  eta: 0:00:34  lr: 0.000525  min_lr: 0.000525  loss: 2.9369 (3.0203)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2784 (1.2864)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [233]  [2501/2502]  eta: 0:00:00  lr: 0.000525  min_lr: 0.000525  loss: 2.9045 (3.0192)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3269 (1.2888)  time: 0.3011  data: 0.0009  max mem: 27255
Epoch: [233] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000525  min_lr: 0.000525  loss: 2.9045 (3.0023)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3269 (1.2888)
Test:  [ 0/50]  eta: 0:02:43  loss: 0.4515 (0.4515)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.2615  data: 3.0628  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7266 (0.7325)  acc1: 88.8000 (87.0545)  acc5: 98.8000 (97.9273)  time: 0.4464  data: 0.2788  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7634 (0.7657)  acc1: 85.2000 (86.1524)  acc5: 98.0000 (97.9048)  time: 0.1646  data: 0.0004  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9162 (0.8601)  acc1: 82.4000 (84.0774)  acc5: 96.4000 (96.6323)  time: 0.1644  data: 0.0004  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0694 (0.9086)  acc1: 78.0000 (82.4488)  acc5: 93.6000 (96.1366)  time: 0.1748  data: 0.0109  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0423 (0.9307)  acc1: 78.0000 (81.7840)  acc5: 94.8000 (96.0640)  time: 0.1747  data: 0.0109  max mem: 27255
Test: Total time: 0:00:11 (0.2322 s / it)
* Acc@1 81.960 Acc@5 96.136 loss 0.922
Accuracy of the model on the 50000 test images: 82.0%
Max accuracy: 81.96%
Epoch: [234]  [   0/2502]  eta: 1:15:03  lr: 0.000525  min_lr: 0.000525  loss: 3.4757 (3.4757)  weight_decay: 0.0500 (0.0500)  time: 1.7999  data: 1.4596  max mem: 27255
Epoch: [234]  [ 200/2502]  eta: 0:13:04  lr: 0.000523  min_lr: 0.000523  loss: 3.1854 (3.0306)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1863 (1.2624)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [234]  [ 400/2502]  eta: 0:11:50  lr: 0.000522  min_lr: 0.000522  loss: 3.1171 (3.0148)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2660 (1.2639)  time: 0.3421  data: 0.0004  max mem: 27255
Epoch: [234]  [ 600/2502]  eta: 0:10:40  lr: 0.000521  min_lr: 0.000521  loss: 2.5945 (2.9999)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2299 (1.2637)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [234]  [ 800/2502]  eta: 0:09:31  lr: 0.000520  min_lr: 0.000520  loss: 2.9313 (2.9965)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2365 (1.2716)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [234]  [1000/2502]  eta: 0:08:23  lr: 0.000519  min_lr: 0.000519  loss: 3.0155 (2.9907)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2914 (1.2694)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [234]  [1200/2502]  eta: 0:07:16  lr: 0.000517  min_lr: 0.000517  loss: 2.8153 (2.9880)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2699 (1.2860)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [234]  [1400/2502]  eta: 0:06:09  lr: 0.000516  min_lr: 0.000516  loss: 3.0981 (2.9951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1912 (1.2914)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [234]  [1600/2502]  eta: 0:05:02  lr: 0.000515  min_lr: 0.000515  loss: 3.0153 (2.9939)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2603 (1.2852)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [234]  [1800/2502]  eta: 0:03:55  lr: 0.000514  min_lr: 0.000514  loss: 3.1952 (2.9939)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2871 (1.2960)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [234]  [2000/2502]  eta: 0:02:48  lr: 0.000513  min_lr: 0.000513  loss: 2.7307 (2.9932)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2753 (1.2948)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [234]  [2200/2502]  eta: 0:01:41  lr: 0.000511  min_lr: 0.000511  loss: 2.8289 (2.9951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2833 (1.2990)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [234]  [2400/2502]  eta: 0:00:34  lr: 0.000510  min_lr: 0.000510  loss: 3.0675 (2.9909)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2934 (1.2988)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [234]  [2501/2502]  eta: 0:00:00  lr: 0.000510  min_lr: 0.000510  loss: 3.0413 (2.9944)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2632 (1.2974)  time: 0.3010  data: 0.0009  max mem: 27255
Epoch: [234] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.000510  min_lr: 0.000510  loss: 3.0413 (2.9878)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2632 (1.2974)
Test:  [ 0/50]  eta: 0:02:34  loss: 0.5966 (0.5966)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.0930  data: 2.8960  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.8199 (0.8160)  acc1: 87.6000 (87.2364)  acc5: 98.4000 (97.7818)  time: 0.5185  data: 0.3505  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8372 (0.8496)  acc1: 83.6000 (86.0000)  acc5: 98.0000 (97.7333)  time: 0.2219  data: 0.0559  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0016 (0.9408)  acc1: 82.4000 (84.2323)  acc5: 95.6000 (96.4774)  time: 0.1884  data: 0.0230  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1556 (0.9933)  acc1: 78.8000 (82.8293)  acc5: 93.6000 (95.9805)  time: 0.1949  data: 0.0311  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1497 (1.0162)  acc1: 77.6000 (82.0160)  acc5: 94.0000 (95.8560)  time: 0.1890  data: 0.0252  max mem: 27255
Test: Total time: 0:00:13 (0.2635 s / it)
* Acc@1 82.150 Acc@5 96.158 loss 1.014
Accuracy of the model on the 50000 test images: 82.2%
Max accuracy: 82.15%
Epoch: [235]  [   0/2502]  eta: 1:14:07  lr: 0.000510  min_lr: 0.000510  loss: 2.2168 (2.2168)  weight_decay: 0.0500 (0.0500)  time: 1.7777  data: 1.4198  max mem: 27255
Epoch: [235]  [ 200/2502]  eta: 0:13:08  lr: 0.000508  min_lr: 0.000508  loss: 2.9635 (2.9523)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3191 (1.3329)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [235]  [ 400/2502]  eta: 0:11:51  lr: 0.000507  min_lr: 0.000507  loss: 2.7959 (2.9738)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3876 (1.3319)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [235]  [ 600/2502]  eta: 0:10:41  lr: 0.000506  min_lr: 0.000506  loss: 3.2617 (2.9747)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3230 (1.3322)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [235]  [ 800/2502]  eta: 0:09:33  lr: 0.000505  min_lr: 0.000505  loss: 3.1747 (2.9818)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3288 (1.3309)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [235]  [1000/2502]  eta: 0:08:25  lr: 0.000504  min_lr: 0.000504  loss: 2.7733 (2.9862)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2096 (1.3252)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [235]  [1200/2502]  eta: 0:07:17  lr: 0.000502  min_lr: 0.000502  loss: 3.2364 (2.9854)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3437 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [235]  [1400/2502]  eta: 0:06:10  lr: 0.000501  min_lr: 0.000501  loss: 2.9060 (2.9868)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2989 (inf)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [235]  [1600/2502]  eta: 0:05:03  lr: 0.000500  min_lr: 0.000500  loss: 2.9978 (2.9854)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1589 (inf)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [235]  [1800/2502]  eta: 0:03:55  lr: 0.000499  min_lr: 0.000499  loss: 3.1574 (2.9812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3547 (inf)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [235]  [2000/2502]  eta: 0:02:48  lr: 0.000498  min_lr: 0.000498  loss: 2.7548 (2.9800)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3775 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [235]  [2200/2502]  eta: 0:01:41  lr: 0.000497  min_lr: 0.000497  loss: 2.7628 (2.9754)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [235]  [2400/2502]  eta: 0:00:34  lr: 0.000495  min_lr: 0.000495  loss: 2.9978 (2.9724)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3262 (nan)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [235]  [2501/2502]  eta: 0:00:00  lr: 0.000495  min_lr: 0.000495  loss: 3.0046 (2.9702)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2821 (nan)  time: 0.2998  data: 0.0007  max mem: 27255
Epoch: [235] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000495  min_lr: 0.000495  loss: 3.0046 (2.9823)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2821 (nan)
Test:  [ 0/50]  eta: 0:02:38  loss: 0.5347 (0.5347)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.1601  data: 2.9643  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8327 (0.8370)  acc1: 88.4000 (87.1273)  acc5: 98.0000 (98.0000)  time: 0.4368  data: 0.2700  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8753 (0.8629)  acc1: 84.0000 (86.4191)  acc5: 98.0000 (97.9810)  time: 0.1645  data: 0.0005  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9939 (0.9595)  acc1: 82.4000 (84.1032)  acc5: 96.8000 (96.6581)  time: 0.1840  data: 0.0201  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1566 (1.0048)  acc1: 78.4000 (82.7707)  acc5: 94.0000 (96.2342)  time: 0.2303  data: 0.0664  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1566 (1.0242)  acc1: 77.6000 (82.0240)  acc5: 95.2000 (96.2320)  time: 0.2105  data: 0.0467  max mem: 27255
Test: Total time: 0:00:12 (0.2527 s / it)
* Acc@1 82.154 Acc@5 96.268 loss 1.018
Accuracy of the model on the 50000 test images: 82.2%
Max accuracy: 82.15%
Epoch: [236]  [   0/2502]  eta: 1:20:21  lr: 0.000495  min_lr: 0.000495  loss: 2.1159 (2.1159)  weight_decay: 0.0500 (0.0500)  time: 1.9271  data: 1.5711  max mem: 27255
Epoch: [236]  [ 200/2502]  eta: 0:13:08  lr: 0.000494  min_lr: 0.000494  loss: 3.0446 (2.9679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4675 (1.8564)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [236]  [ 400/2502]  eta: 0:11:52  lr: 0.000492  min_lr: 0.000492  loss: 3.1509 (2.9487)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2979 (1.5950)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [236]  [ 600/2502]  eta: 0:10:41  lr: 0.000491  min_lr: 0.000491  loss: 2.7162 (2.9523)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2670 (1.5020)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [236]  [ 800/2502]  eta: 0:09:33  lr: 0.000490  min_lr: 0.000490  loss: 3.1739 (2.9637)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3338 (1.4631)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [236]  [1000/2502]  eta: 0:08:25  lr: 0.000489  min_lr: 0.000489  loss: 2.9080 (2.9610)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3298 (1.4375)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [236]  [1200/2502]  eta: 0:07:17  lr: 0.000488  min_lr: 0.000488  loss: 3.1243 (2.9595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3053 (1.4094)  time: 0.3450  data: 0.0004  max mem: 27255
Epoch: [236]  [1400/2502]  eta: 0:06:10  lr: 0.000486  min_lr: 0.000486  loss: 3.0116 (2.9596)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2572 (1.3941)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [236]  [1600/2502]  eta: 0:05:02  lr: 0.000485  min_lr: 0.000485  loss: 3.0933 (2.9605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2932 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [236]  [1800/2502]  eta: 0:03:55  lr: 0.000484  min_lr: 0.000484  loss: 2.9925 (2.9618)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3014 (inf)  time: 0.3368  data: 0.0005  max mem: 27255
Epoch: [236]  [2000/2502]  eta: 0:02:48  lr: 0.000483  min_lr: 0.000483  loss: 2.9702 (2.9635)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3197 (inf)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [236]  [2200/2502]  eta: 0:01:41  lr: 0.000482  min_lr: 0.000482  loss: 2.9526 (2.9658)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1791 (inf)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [236]  [2400/2502]  eta: 0:00:34  lr: 0.000481  min_lr: 0.000481  loss: 3.0721 (2.9645)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2734 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [236]  [2501/2502]  eta: 0:00:00  lr: 0.000480  min_lr: 0.000480  loss: 3.1943 (2.9664)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2998 (inf)  time: 0.3004  data: 0.0007  max mem: 27255
Epoch: [236] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.000480  min_lr: 0.000480  loss: 3.1943 (2.9665)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2998 (inf)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.6085 (0.6085)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.7101  data: 2.5165  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8787 (0.8651)  acc1: 87.2000 (87.0546)  acc5: 98.4000 (97.9636)  time: 0.4095  data: 0.2429  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9008 (0.8933)  acc1: 84.4000 (86.0191)  acc5: 98.0000 (97.8667)  time: 0.1950  data: 0.0307  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0673 (0.9931)  acc1: 81.6000 (84.0000)  acc5: 96.4000 (96.8387)  time: 0.1954  data: 0.0312  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2042 (1.0509)  acc1: 78.4000 (82.4683)  acc5: 94.0000 (96.1951)  time: 0.2057  data: 0.0419  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2035 (1.0721)  acc1: 78.4000 (81.7760)  acc5: 94.8000 (96.1280)  time: 0.1991  data: 0.0354  max mem: 27255
Test: Total time: 0:00:12 (0.2465 s / it)
* Acc@1 82.144 Acc@5 96.156 loss 1.067
Accuracy of the model on the 50000 test images: 82.1%
Max accuracy: 82.15%
Epoch: [237]  [   0/2502]  eta: 1:32:49  lr: 0.000480  min_lr: 0.000480  loss: 3.3871 (3.3871)  weight_decay: 0.0500 (0.0500)  time: 2.2260  data: 1.8363  max mem: 27255
Epoch: [237]  [ 200/2502]  eta: 0:13:13  lr: 0.000479  min_lr: 0.000479  loss: 2.9552 (2.9425)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3624 (1.3157)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [237]  [ 400/2502]  eta: 0:11:53  lr: 0.000478  min_lr: 0.000478  loss: 3.0680 (2.9612)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2948 (1.3361)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [237]  [ 600/2502]  eta: 0:10:42  lr: 0.000477  min_lr: 0.000477  loss: 3.1555 (2.9605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2936 (1.3376)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [237]  [ 800/2502]  eta: 0:09:33  lr: 0.000475  min_lr: 0.000475  loss: 2.8798 (2.9626)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2755 (1.3356)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [237]  [1000/2502]  eta: 0:08:25  lr: 0.000474  min_lr: 0.000474  loss: 2.8597 (2.9565)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2936 (1.3450)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [237]  [1200/2502]  eta: 0:07:17  lr: 0.000473  min_lr: 0.000473  loss: 3.1676 (2.9598)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2362 (1.3408)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [237]  [1400/2502]  eta: 0:06:10  lr: 0.000472  min_lr: 0.000472  loss: 3.1541 (2.9537)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2815 (1.3360)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [237]  [1600/2502]  eta: 0:05:02  lr: 0.000471  min_lr: 0.000471  loss: 3.1555 (2.9627)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3904 (1.3401)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [237]  [1800/2502]  eta: 0:03:55  lr: 0.000470  min_lr: 0.000470  loss: 2.8386 (2.9632)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3264 (1.3332)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [237]  [2000/2502]  eta: 0:02:48  lr: 0.000468  min_lr: 0.000468  loss: 2.8637 (2.9628)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2928 (1.3320)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [237]  [2200/2502]  eta: 0:01:41  lr: 0.000467  min_lr: 0.000467  loss: 2.9495 (2.9570)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2921 (1.3337)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [237]  [2400/2502]  eta: 0:00:34  lr: 0.000466  min_lr: 0.000466  loss: 3.1642 (2.9653)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3953 (1.3388)  time: 0.3366  data: 0.0005  max mem: 27255
Epoch: [237]  [2501/2502]  eta: 0:00:00  lr: 0.000466  min_lr: 0.000466  loss: 2.6892 (2.9656)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3338 (1.3386)  time: 0.3006  data: 0.0009  max mem: 27255
Epoch: [237] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000466  min_lr: 0.000466  loss: 2.6892 (2.9616)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3338 (1.3386)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.4471 (0.4471)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 2.8129  data: 2.6231  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.7353 (0.7439)  acc1: 88.8000 (87.1273)  acc5: 98.0000 (97.6727)  time: 0.4197  data: 0.2534  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7471 (0.7756)  acc1: 84.0000 (86.1714)  acc5: 97.6000 (97.7524)  time: 0.2014  data: 0.0375  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9215 (0.8664)  acc1: 82.4000 (84.0903)  acc5: 95.2000 (96.5290)  time: 0.2239  data: 0.0599  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0605 (0.9133)  acc1: 78.4000 (82.8488)  acc5: 94.0000 (96.0000)  time: 0.2291  data: 0.0652  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0605 (0.9335)  acc1: 78.4000 (82.0800)  acc5: 94.4000 (96.0080)  time: 0.1999  data: 0.0362  max mem: 27255
Test: Total time: 0:00:13 (0.2606 s / it)
* Acc@1 82.246 Acc@5 96.164 loss 0.927
Accuracy of the model on the 50000 test images: 82.2%
Max accuracy: 82.25%
Epoch: [238]  [   0/2502]  eta: 1:29:32  lr: 0.000466  min_lr: 0.000466  loss: 2.4291 (2.4291)  weight_decay: 0.0500 (0.0500)  time: 2.1475  data: 1.7968  max mem: 27255
Epoch: [238]  [ 200/2502]  eta: 0:13:12  lr: 0.000464  min_lr: 0.000464  loss: 3.0060 (2.9407)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3152 (1.3203)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [238]  [ 400/2502]  eta: 0:11:54  lr: 0.000463  min_lr: 0.000463  loss: 3.0861 (2.9351)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3770 (1.3327)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [238]  [ 600/2502]  eta: 0:10:42  lr: 0.000462  min_lr: 0.000462  loss: 3.0194 (2.9487)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3377 (1.3531)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [238]  [ 800/2502]  eta: 0:09:33  lr: 0.000461  min_lr: 0.000461  loss: 3.0200 (2.9525)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2969 (1.3353)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [238]  [1000/2502]  eta: 0:08:24  lr: 0.000460  min_lr: 0.000460  loss: 3.0405 (2.9520)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3159 (1.3336)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [238]  [1200/2502]  eta: 0:07:17  lr: 0.000459  min_lr: 0.000459  loss: 3.2805 (2.9539)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3973 (1.3469)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [238]  [1400/2502]  eta: 0:06:09  lr: 0.000458  min_lr: 0.000458  loss: 3.1706 (2.9633)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4037 (1.3528)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [238]  [1600/2502]  eta: 0:05:02  lr: 0.000456  min_lr: 0.000456  loss: 3.0795 (2.9629)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3116 (1.3618)  time: 0.3369  data: 0.0005  max mem: 27255
Epoch: [238]  [1800/2502]  eta: 0:03:55  lr: 0.000455  min_lr: 0.000455  loss: 3.1323 (2.9712)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2831 (1.3602)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [238]  [2000/2502]  eta: 0:02:48  lr: 0.000454  min_lr: 0.000454  loss: 2.8215 (2.9675)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3583 (1.3656)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [238]  [2200/2502]  eta: 0:01:41  lr: 0.000453  min_lr: 0.000453  loss: 2.9301 (2.9682)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3290 (1.3664)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [238]  [2400/2502]  eta: 0:00:34  lr: 0.000452  min_lr: 0.000452  loss: 3.0787 (2.9670)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2820 (1.3650)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [238]  [2501/2502]  eta: 0:00:00  lr: 0.000451  min_lr: 0.000451  loss: 2.9761 (2.9679)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2988 (1.3619)  time: 0.3012  data: 0.0009  max mem: 27255
Epoch: [238] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000451  min_lr: 0.000451  loss: 2.9761 (2.9634)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2988 (1.3619)
Test:  [ 0/50]  eta: 0:02:09  loss: 0.5778 (0.5778)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 2.5995  data: 2.4079  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8259 (0.8303)  acc1: 88.4000 (86.7636)  acc5: 98.4000 (98.1091)  time: 0.4187  data: 0.2522  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8391 (0.8631)  acc1: 84.4000 (86.0191)  acc5: 98.0000 (98.0952)  time: 0.2052  data: 0.0412  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0599 (0.9660)  acc1: 81.2000 (83.8710)  acc5: 96.0000 (96.8774)  time: 0.2042  data: 0.0403  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1797 (1.0183)  acc1: 78.8000 (82.5854)  acc5: 94.4000 (96.3415)  time: 0.1948  data: 0.0304  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1797 (1.0383)  acc1: 78.4000 (82.0320)  acc5: 94.8000 (96.2400)  time: 0.1820  data: 0.0177  max mem: 27255
Test: Total time: 0:00:12 (0.2431 s / it)
* Acc@1 82.200 Acc@5 96.242 loss 1.029
Accuracy of the model on the 50000 test images: 82.2%
Max accuracy: 82.25%
Epoch: [239]  [   0/2502]  eta: 1:21:42  lr: 0.000451  min_lr: 0.000451  loss: 3.1476 (3.1476)  weight_decay: 0.0500 (0.0500)  time: 1.9596  data: 1.3082  max mem: 27255
Epoch: [239]  [ 200/2502]  eta: 0:13:10  lr: 0.000450  min_lr: 0.000450  loss: 2.8701 (2.9295)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4158 (1.4697)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [239]  [ 400/2502]  eta: 0:11:51  lr: 0.000449  min_lr: 0.000449  loss: 3.0617 (2.9212)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2803 (1.4102)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [239]  [ 600/2502]  eta: 0:10:40  lr: 0.000448  min_lr: 0.000448  loss: 3.1509 (2.9354)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3734 (1.3828)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [239]  [ 800/2502]  eta: 0:09:32  lr: 0.000447  min_lr: 0.000447  loss: 3.0675 (2.9432)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.1815 (1.3640)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [239]  [1000/2502]  eta: 0:08:24  lr: 0.000446  min_lr: 0.000446  loss: 2.9970 (2.9444)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4075 (1.3737)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [239]  [1200/2502]  eta: 0:07:17  lr: 0.000445  min_lr: 0.000445  loss: 2.9734 (2.9460)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2515 (1.3687)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [239]  [1400/2502]  eta: 0:06:09  lr: 0.000443  min_lr: 0.000443  loss: 3.1755 (2.9460)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3353 (1.3703)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [239]  [1600/2502]  eta: 0:05:02  lr: 0.000442  min_lr: 0.000442  loss: 2.8862 (2.9501)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4018 (1.3745)  time: 0.3418  data: 0.0004  max mem: 27255
Epoch: [239]  [1800/2502]  eta: 0:03:55  lr: 0.000441  min_lr: 0.000441  loss: 2.9425 (2.9476)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2593 (1.3677)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [239]  [2000/2502]  eta: 0:02:48  lr: 0.000440  min_lr: 0.000440  loss: 3.2427 (2.9538)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3260 (1.3665)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [239]  [2200/2502]  eta: 0:01:41  lr: 0.000439  min_lr: 0.000439  loss: 3.0470 (2.9595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4253 (1.3713)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [239]  [2400/2502]  eta: 0:00:34  lr: 0.000438  min_lr: 0.000438  loss: 2.9729 (2.9624)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2917 (1.3683)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [239]  [2501/2502]  eta: 0:00:00  lr: 0.000437  min_lr: 0.000437  loss: 2.9458 (2.9638)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4566 (1.3775)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [239] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000437  min_lr: 0.000437  loss: 2.9458 (2.9570)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4566 (1.3775)
Test:  [ 0/50]  eta: 0:03:08  loss: 0.5342 (0.5342)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.7612  data: 3.5650  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7608 (0.7853)  acc1: 89.2000 (87.7455)  acc5: 98.8000 (98.0727)  time: 0.4914  data: 0.3244  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.7928 (0.8122)  acc1: 84.0000 (86.4191)  acc5: 98.0000 (97.8857)  time: 0.2185  data: 0.0544  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9910 (0.9063)  acc1: 81.6000 (84.0774)  acc5: 96.0000 (96.7871)  time: 0.2572  data: 0.0933  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0854 (0.9588)  acc1: 78.4000 (82.5366)  acc5: 94.0000 (96.2829)  time: 0.2047  data: 0.0411  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0731 (0.9806)  acc1: 77.6000 (81.8400)  acc5: 95.2000 (96.1840)  time: 0.2044  data: 0.0409  max mem: 27255
Test: Total time: 0:00:13 (0.2758 s / it)
* Acc@1 82.080 Acc@5 96.236 loss 0.973
Accuracy of the model on the 50000 test images: 82.1%
Max accuracy: 82.25%
Epoch: [240]  [   0/2502]  eta: 1:30:15  lr: 0.000437  min_lr: 0.000437  loss: 3.0011 (3.0011)  weight_decay: 0.0500 (0.0500)  time: 2.1644  data: 1.7014  max mem: 27255
Epoch: [240]  [ 200/2502]  eta: 0:13:09  lr: 0.000436  min_lr: 0.000436  loss: 3.0516 (2.9505)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2584 (1.3409)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [240]  [ 400/2502]  eta: 0:11:51  lr: 0.000435  min_lr: 0.000435  loss: 3.1530 (2.9606)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3332 (1.3309)  time: 0.3413  data: 0.0004  max mem: 27255
Epoch: [240]  [ 600/2502]  eta: 0:10:41  lr: 0.000434  min_lr: 0.000434  loss: 2.8174 (2.9517)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3546 (1.3357)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [240]  [ 800/2502]  eta: 0:09:32  lr: 0.000433  min_lr: 0.000433  loss: 2.9536 (2.9534)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2858 (1.3345)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [240]  [1000/2502]  eta: 0:08:24  lr: 0.000432  min_lr: 0.000432  loss: 3.1133 (2.9650)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3741 (1.3313)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [240]  [1200/2502]  eta: 0:07:17  lr: 0.000431  min_lr: 0.000431  loss: 2.8739 (2.9607)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2857 (1.3323)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [240]  [1400/2502]  eta: 0:06:09  lr: 0.000429  min_lr: 0.000429  loss: 3.0818 (2.9568)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2921 (1.3381)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [240]  [1600/2502]  eta: 0:05:02  lr: 0.000428  min_lr: 0.000428  loss: 3.0642 (2.9545)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3701 (1.3472)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [240]  [1800/2502]  eta: 0:03:55  lr: 0.000427  min_lr: 0.000427  loss: 3.0312 (2.9527)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2963 (1.3472)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [240]  [2000/2502]  eta: 0:02:48  lr: 0.000426  min_lr: 0.000426  loss: 3.0914 (2.9492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3192 (1.3492)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [240]  [2200/2502]  eta: 0:01:41  lr: 0.000425  min_lr: 0.000425  loss: 3.1592 (2.9512)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3508 (1.3556)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [240]  [2400/2502]  eta: 0:00:34  lr: 0.000424  min_lr: 0.000424  loss: 3.0665 (2.9520)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3324 (1.3540)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [240]  [2501/2502]  eta: 0:00:00  lr: 0.000423  min_lr: 0.000423  loss: 3.0644 (2.9532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3615 (1.3538)  time: 0.2999  data: 0.0009  max mem: 27255
Epoch: [240] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000423  min_lr: 0.000423  loss: 3.0644 (2.9523)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3615 (1.3538)
Test:  [ 0/50]  eta: 0:02:31  loss: 0.6027 (0.6027)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.0221  data: 2.8277  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8700 (0.8487)  acc1: 86.8000 (86.6182)  acc5: 98.0000 (97.7091)  time: 0.4341  data: 0.2675  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.9000 (0.8846)  acc1: 84.8000 (85.7905)  acc5: 97.6000 (97.6762)  time: 0.1800  data: 0.0161  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0177 (0.9788)  acc1: 81.6000 (83.6645)  acc5: 96.4000 (96.6968)  time: 0.1777  data: 0.0138  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2133 (1.0336)  acc1: 78.4000 (82.3707)  acc5: 94.4000 (96.2342)  time: 0.1876  data: 0.0238  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2133 (1.0515)  acc1: 78.4000 (81.7280)  acc5: 95.2000 (96.1760)  time: 0.1870  data: 0.0230  max mem: 27255
Test: Total time: 0:00:12 (0.2413 s / it)
* Acc@1 82.198 Acc@5 96.288 loss 1.045
Accuracy of the model on the 50000 test images: 82.2%
Max accuracy: 82.25%
Epoch: [241]  [   0/2502]  eta: 2:05:58  lr: 0.000423  min_lr: 0.000423  loss: 2.2688 (2.2688)  weight_decay: 0.0500 (0.0500)  time: 3.0212  data: 2.3323  max mem: 27255
Epoch: [241]  [ 200/2502]  eta: 0:13:26  lr: 0.000422  min_lr: 0.000422  loss: 3.0944 (2.9096)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3434 (1.3633)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [241]  [ 400/2502]  eta: 0:11:59  lr: 0.000421  min_lr: 0.000421  loss: 3.0866 (2.9470)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3795 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [241]  [ 600/2502]  eta: 0:10:46  lr: 0.000420  min_lr: 0.000420  loss: 3.1676 (2.9392)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2488 (inf)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [241]  [ 800/2502]  eta: 0:09:36  lr: 0.000419  min_lr: 0.000419  loss: 3.0868 (2.9414)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3821 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [241]  [1000/2502]  eta: 0:08:27  lr: 0.000418  min_lr: 0.000418  loss: 3.1001 (2.9287)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3079 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [241]  [1200/2502]  eta: 0:07:18  lr: 0.000417  min_lr: 0.000417  loss: 3.1035 (2.9330)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2900 (inf)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [241]  [1400/2502]  eta: 0:06:11  lr: 0.000416  min_lr: 0.000416  loss: 3.1361 (2.9381)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3590 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [241]  [1600/2502]  eta: 0:05:03  lr: 0.000415  min_lr: 0.000415  loss: 3.0612 (2.9411)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4865 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [241]  [1800/2502]  eta: 0:03:56  lr: 0.000413  min_lr: 0.000413  loss: 2.9867 (2.9421)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3177 (inf)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [241]  [2000/2502]  eta: 0:02:48  lr: 0.000412  min_lr: 0.000412  loss: 3.1197 (2.9465)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2612 (inf)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [241]  [2200/2502]  eta: 0:01:41  lr: 0.000411  min_lr: 0.000411  loss: 3.0146 (2.9477)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2874 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [241]  [2400/2502]  eta: 0:00:34  lr: 0.000410  min_lr: 0.000410  loss: 3.2889 (2.9472)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3145 (inf)  time: 0.3356  data: 0.0004  max mem: 27255
Epoch: [241]  [2501/2502]  eta: 0:00:00  lr: 0.000410  min_lr: 0.000410  loss: 3.0515 (2.9489)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3388 (inf)  time: 0.3003  data: 0.0009  max mem: 27255
Epoch: [241] Total time: 0:14:00 (0.3360 s / it)
Averaged stats: lr: 0.000410  min_lr: 0.000410  loss: 3.0515 (2.9476)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3388 (inf)
Test:  [ 0/50]  eta: 0:02:30  loss: 0.5490 (0.5490)  acc1: 94.8000 (94.8000)  acc5: 98.8000 (98.8000)  time: 3.0186  data: 2.8253  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.7891 (0.8119)  acc1: 88.4000 (87.4545)  acc5: 97.6000 (97.7455)  time: 0.5046  data: 0.3381  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8488 (0.8468)  acc1: 84.4000 (86.0762)  acc5: 97.6000 (97.7333)  time: 0.2252  data: 0.0615  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0070 (0.9305)  acc1: 82.0000 (84.3742)  acc5: 96.4000 (96.6710)  time: 0.1878  data: 0.0241  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1066 (0.9775)  acc1: 79.6000 (82.9463)  acc5: 94.8000 (96.2146)  time: 0.1772  data: 0.0137  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1260 (0.9967)  acc1: 79.6000 (82.3120)  acc5: 94.8000 (96.0480)  time: 0.1770  data: 0.0135  max mem: 27255
Test: Total time: 0:00:12 (0.2529 s / it)
* Acc@1 82.384 Acc@5 96.200 loss 0.992
Accuracy of the model on the 50000 test images: 82.4%
Max accuracy: 82.38%
Epoch: [242]  [   0/2502]  eta: 1:27:03  lr: 0.000410  min_lr: 0.000410  loss: 3.4091 (3.4091)  weight_decay: 0.0500 (0.0500)  time: 2.0876  data: 1.7289  max mem: 27255
Epoch: [242]  [ 200/2502]  eta: 0:13:09  lr: 0.000409  min_lr: 0.000409  loss: 3.1311 (2.9447)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3454 (1.3492)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [242]  [ 400/2502]  eta: 0:11:50  lr: 0.000407  min_lr: 0.000407  loss: 3.0631 (2.9444)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3581 (1.3610)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [242]  [ 600/2502]  eta: 0:10:41  lr: 0.000406  min_lr: 0.000406  loss: 2.9266 (2.9378)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3263 (1.3609)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [242]  [ 800/2502]  eta: 0:09:32  lr: 0.000405  min_lr: 0.000405  loss: 3.0832 (2.9248)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2967 (1.3621)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [242]  [1000/2502]  eta: 0:08:24  lr: 0.000404  min_lr: 0.000404  loss: 3.0862 (2.9311)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3067 (1.3750)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [242]  [1200/2502]  eta: 0:07:17  lr: 0.000403  min_lr: 0.000403  loss: 2.9984 (2.9375)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4449 (1.3873)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [242]  [1400/2502]  eta: 0:06:09  lr: 0.000402  min_lr: 0.000402  loss: 3.0920 (2.9431)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2614 (1.3872)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [242]  [1600/2502]  eta: 0:05:02  lr: 0.000401  min_lr: 0.000401  loss: 3.0672 (2.9431)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5035 (1.3950)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [242]  [1800/2502]  eta: 0:03:55  lr: 0.000400  min_lr: 0.000400  loss: 3.0234 (2.9406)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3449 (1.3934)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [242]  [2000/2502]  eta: 0:02:48  lr: 0.000399  min_lr: 0.000399  loss: 3.0130 (2.9381)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3519 (1.3921)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [242]  [2200/2502]  eta: 0:01:41  lr: 0.000398  min_lr: 0.000398  loss: 3.0781 (2.9385)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3753 (1.3947)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [242]  [2400/2502]  eta: 0:00:34  lr: 0.000397  min_lr: 0.000397  loss: 3.1017 (2.9362)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3019 (1.3902)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [242]  [2501/2502]  eta: 0:00:00  lr: 0.000396  min_lr: 0.000396  loss: 3.0418 (2.9377)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4273 (1.3943)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [242] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000396  min_lr: 0.000396  loss: 3.0418 (2.9328)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4273 (1.3943)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.6135 (0.6135)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.8147  data: 2.6080  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8567 (0.8661)  acc1: 87.2000 (87.1273)  acc5: 98.4000 (97.8909)  time: 0.4089  data: 0.2410  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8691 (0.8916)  acc1: 84.0000 (86.1524)  acc5: 97.6000 (97.7714)  time: 0.1966  data: 0.0328  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0685 (0.9875)  acc1: 81.6000 (84.3355)  acc5: 95.6000 (96.7742)  time: 0.2403  data: 0.0761  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.2005 (1.0415)  acc1: 79.2000 (82.8781)  acc5: 94.8000 (96.2927)  time: 0.2319  data: 0.0676  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.2005 (1.0616)  acc1: 78.0000 (82.2160)  acc5: 94.8000 (96.2240)  time: 0.1859  data: 0.0224  max mem: 27255
Test: Total time: 0:00:12 (0.2589 s / it)
* Acc@1 82.412 Acc@5 96.364 loss 1.057
Accuracy of the model on the 50000 test images: 82.4%
Max accuracy: 82.41%
Epoch: [243]  [   0/2502]  eta: 1:13:33  lr: 0.000396  min_lr: 0.000396  loss: 2.4458 (2.4458)  weight_decay: 0.0500 (0.0500)  time: 1.7640  data: 1.4263  max mem: 27255
Epoch: [243]  [ 200/2502]  eta: 0:13:05  lr: 0.000395  min_lr: 0.000395  loss: 2.9139 (2.9361)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4248 (1.5394)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [243]  [ 400/2502]  eta: 0:11:52  lr: 0.000394  min_lr: 0.000394  loss: 3.0670 (2.9345)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3917 (1.4825)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [243]  [ 600/2502]  eta: 0:10:41  lr: 0.000393  min_lr: 0.000393  loss: 2.8809 (2.9359)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2785 (1.4383)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [243]  [ 800/2502]  eta: 0:09:32  lr: 0.000392  min_lr: 0.000392  loss: 3.2520 (2.9521)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4294 (1.4327)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [243]  [1000/2502]  eta: 0:08:24  lr: 0.000391  min_lr: 0.000391  loss: 3.0835 (2.9565)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3871 (1.4234)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [243]  [1200/2502]  eta: 0:07:16  lr: 0.000390  min_lr: 0.000390  loss: 3.0064 (2.9593)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3891 (1.4262)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [243]  [1400/2502]  eta: 0:06:09  lr: 0.000389  min_lr: 0.000389  loss: 3.0849 (2.9642)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3661 (1.4236)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [243]  [1600/2502]  eta: 0:05:02  lr: 0.000388  min_lr: 0.000388  loss: 3.1861 (2.9584)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3686 (1.4211)  time: 0.3419  data: 0.0004  max mem: 27255
Epoch: [243]  [1800/2502]  eta: 0:03:55  lr: 0.000387  min_lr: 0.000387  loss: 2.8285 (2.9563)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3253 (1.4173)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [243]  [2000/2502]  eta: 0:02:48  lr: 0.000386  min_lr: 0.000386  loss: 3.0631 (2.9480)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4672 (1.4160)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [243]  [2200/2502]  eta: 0:01:41  lr: 0.000384  min_lr: 0.000384  loss: 3.0771 (2.9460)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3215 (1.4201)  time: 0.3425  data: 0.0004  max mem: 27255
Epoch: [243]  [2400/2502]  eta: 0:00:34  lr: 0.000383  min_lr: 0.000383  loss: 3.1917 (2.9492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3566 (1.4185)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [243]  [2501/2502]  eta: 0:00:00  lr: 0.000383  min_lr: 0.000383  loss: 3.0951 (2.9480)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3413 (1.4158)  time: 0.3015  data: 0.0011  max mem: 27255
Epoch: [243] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000383  min_lr: 0.000383  loss: 3.0951 (2.9307)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3413 (1.4158)
Test:  [ 0/50]  eta: 0:02:14  loss: 0.5699 (0.5699)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.6831  data: 2.4679  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7828 (0.7897)  acc1: 89.2000 (88.1091)  acc5: 98.4000 (98.0364)  time: 0.4734  data: 0.3022  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8105 (0.8181)  acc1: 84.4000 (86.5143)  acc5: 98.0000 (97.8286)  time: 0.2443  data: 0.0790  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9737 (0.9150)  acc1: 81.2000 (84.2968)  acc5: 95.6000 (96.7484)  time: 0.2189  data: 0.0552  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1215 (0.9652)  acc1: 78.8000 (82.9463)  acc5: 94.4000 (96.2927)  time: 0.2010  data: 0.0373  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1215 (0.9834)  acc1: 78.8000 (82.4320)  acc5: 95.2000 (96.2080)  time: 0.1824  data: 0.0189  max mem: 27255
Test: Total time: 0:00:13 (0.2634 s / it)
* Acc@1 82.432 Acc@5 96.344 loss 0.978
Accuracy of the model on the 50000 test images: 82.4%
Max accuracy: 82.43%
Epoch: [244]  [   0/2502]  eta: 1:16:09  lr: 0.000383  min_lr: 0.000383  loss: 3.1242 (3.1242)  weight_decay: 0.0500 (0.0500)  time: 1.8262  data: 1.4894  max mem: 27255
Epoch: [244]  [ 200/2502]  eta: 0:13:07  lr: 0.000382  min_lr: 0.000382  loss: 2.9111 (2.9104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3649 (1.4166)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [244]  [ 400/2502]  eta: 0:11:51  lr: 0.000381  min_lr: 0.000381  loss: 2.8805 (2.8962)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4582 (1.4270)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [244]  [ 600/2502]  eta: 0:10:41  lr: 0.000380  min_lr: 0.000380  loss: 3.0569 (2.9122)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3487 (1.4099)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [244]  [ 800/2502]  eta: 0:09:32  lr: 0.000379  min_lr: 0.000379  loss: 3.0552 (2.9161)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3917 (1.4070)  time: 0.3405  data: 0.0004  max mem: 27255
Epoch: [244]  [1000/2502]  eta: 0:08:24  lr: 0.000378  min_lr: 0.000378  loss: 3.0272 (2.9157)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3138 (1.4071)  time: 0.3359  data: 0.0004  max mem: 27255
Epoch: [244]  [1200/2502]  eta: 0:07:17  lr: 0.000377  min_lr: 0.000377  loss: 2.9735 (2.9175)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4100 (1.4142)  time: 0.3412  data: 0.0005  max mem: 27255
Epoch: [244]  [1400/2502]  eta: 0:06:09  lr: 0.000376  min_lr: 0.000376  loss: 2.8194 (2.9133)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3644 (1.4088)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [244]  [1600/2502]  eta: 0:05:02  lr: 0.000374  min_lr: 0.000374  loss: 3.0925 (2.9206)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3568 (1.4128)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [244]  [1800/2502]  eta: 0:03:55  lr: 0.000373  min_lr: 0.000373  loss: 3.0804 (2.9224)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3745 (1.4188)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [244]  [2000/2502]  eta: 0:02:48  lr: 0.000372  min_lr: 0.000372  loss: 3.1090 (2.9201)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4580 (1.4240)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [244]  [2200/2502]  eta: 0:01:41  lr: 0.000371  min_lr: 0.000371  loss: 2.9078 (2.9232)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4630 (1.4241)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [244]  [2400/2502]  eta: 0:00:34  lr: 0.000370  min_lr: 0.000370  loss: 3.1250 (2.9192)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3421 (1.4212)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [244]  [2501/2502]  eta: 0:00:00  lr: 0.000370  min_lr: 0.000370  loss: 2.9212 (2.9187)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4090 (1.4209)  time: 0.3012  data: 0.0009  max mem: 27255
Epoch: [244] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000370  min_lr: 0.000370  loss: 2.9212 (2.9268)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4090 (1.4209)
Test:  [ 0/50]  eta: 0:02:51  loss: 0.5228 (0.5228)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.4206  data: 3.2146  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7454 (0.7557)  acc1: 88.8000 (87.5273)  acc5: 98.8000 (98.1818)  time: 0.4606  data: 0.2926  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7700 (0.7904)  acc1: 84.4000 (86.3238)  acc5: 98.4000 (98.0952)  time: 0.1970  data: 0.0329  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9748 (0.8897)  acc1: 82.4000 (84.4387)  acc5: 96.0000 (96.7871)  time: 0.2006  data: 0.0367  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1065 (0.9413)  acc1: 79.2000 (83.1024)  acc5: 94.8000 (96.3707)  time: 0.1761  data: 0.0122  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0927 (0.9624)  acc1: 78.4000 (82.5040)  acc5: 94.8000 (96.3040)  time: 0.1758  data: 0.0121  max mem: 27255
Test: Total time: 0:00:12 (0.2492 s / it)
* Acc@1 82.574 Acc@5 96.460 loss 0.957
Accuracy of the model on the 50000 test images: 82.6%
Max accuracy: 82.57%
Epoch: [245]  [   0/2502]  eta: 1:33:07  lr: 0.000370  min_lr: 0.000370  loss: 2.8534 (2.8534)  weight_decay: 0.0500 (0.0500)  time: 2.2333  data: 1.8858  max mem: 27255
Epoch: [245]  [ 200/2502]  eta: 0:13:12  lr: 0.000369  min_lr: 0.000369  loss: 2.7998 (2.9298)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3708 (1.4189)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [245]  [ 400/2502]  eta: 0:11:52  lr: 0.000368  min_lr: 0.000368  loss: 3.0673 (2.9052)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3855 (1.4101)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [245]  [ 600/2502]  eta: 0:10:41  lr: 0.000367  min_lr: 0.000367  loss: 2.7547 (2.9046)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4200 (1.4319)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [245]  [ 800/2502]  eta: 0:09:33  lr: 0.000366  min_lr: 0.000366  loss: 2.8384 (2.9135)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4423 (1.4346)  time: 0.3464  data: 0.0008  max mem: 27255
Epoch: [245]  [1000/2502]  eta: 0:08:25  lr: 0.000365  min_lr: 0.000365  loss: 3.0118 (2.9079)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4961 (1.4401)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [245]  [1200/2502]  eta: 0:07:17  lr: 0.000364  min_lr: 0.000364  loss: 3.0393 (2.9036)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5021 (1.4296)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [245]  [1400/2502]  eta: 0:06:10  lr: 0.000363  min_lr: 0.000363  loss: 2.7451 (2.9085)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3810 (1.4236)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [245]  [1600/2502]  eta: 0:05:02  lr: 0.000362  min_lr: 0.000362  loss: 2.9312 (2.9064)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5598 (1.4275)  time: 0.3328  data: 0.0005  max mem: 27255
Epoch: [245]  [1800/2502]  eta: 0:03:55  lr: 0.000360  min_lr: 0.000360  loss: 3.1199 (2.9060)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3158 (1.4271)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [245]  [2000/2502]  eta: 0:02:48  lr: 0.000359  min_lr: 0.000359  loss: 2.8432 (2.9017)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4272 (1.4288)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [245]  [2200/2502]  eta: 0:01:41  lr: 0.000358  min_lr: 0.000358  loss: 3.1340 (2.9082)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4492 (1.4241)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [245]  [2400/2502]  eta: 0:00:34  lr: 0.000357  min_lr: 0.000357  loss: 3.1272 (2.9095)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3782 (1.4221)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [245]  [2501/2502]  eta: 0:00:00  lr: 0.000357  min_lr: 0.000357  loss: 3.1038 (2.9108)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5144 (1.4261)  time: 0.2996  data: 0.0007  max mem: 27255
Epoch: [245] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.000357  min_lr: 0.000357  loss: 3.1038 (2.9190)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5144 (1.4261)
Test:  [ 0/50]  eta: 0:02:07  loss: 0.5802 (0.5802)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.5561  data: 2.3617  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8429 (0.8402)  acc1: 88.4000 (86.9455)  acc5: 98.4000 (97.9273)  time: 0.4450  data: 0.2775  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8592 (0.8707)  acc1: 84.0000 (86.0381)  acc5: 97.6000 (97.9048)  time: 0.2194  data: 0.0539  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0383 (0.9647)  acc1: 81.2000 (83.8581)  acc5: 96.4000 (96.8774)  time: 0.2292  data: 0.0635  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1575 (1.0123)  acc1: 78.8000 (82.4878)  acc5: 94.8000 (96.3707)  time: 0.2404  data: 0.0759  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1494 (1.0294)  acc1: 78.8000 (82.0160)  acc5: 95.2000 (96.2960)  time: 0.1955  data: 0.0318  max mem: 27255
Test: Total time: 0:00:13 (0.2667 s / it)
* Acc@1 82.354 Acc@5 96.362 loss 1.022
Accuracy of the model on the 50000 test images: 82.4%
Max accuracy: 82.57%
Epoch: [246]  [   0/2502]  eta: 1:32:51  lr: 0.000357  min_lr: 0.000357  loss: 3.2128 (3.2128)  weight_decay: 0.0500 (0.0500)  time: 2.2269  data: 1.4830  max mem: 27255
Epoch: [246]  [ 200/2502]  eta: 0:13:12  lr: 0.000356  min_lr: 0.000356  loss: 2.7108 (2.9186)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4985 (1.4472)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [246]  [ 400/2502]  eta: 0:11:52  lr: 0.000355  min_lr: 0.000355  loss: 2.8958 (2.9330)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3554 (1.4389)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [246]  [ 600/2502]  eta: 0:10:41  lr: 0.000354  min_lr: 0.000354  loss: 2.8377 (2.9243)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3519 (1.4207)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [246]  [ 800/2502]  eta: 0:09:32  lr: 0.000353  min_lr: 0.000353  loss: 3.1304 (2.9138)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2809 (1.4023)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [246]  [1000/2502]  eta: 0:08:25  lr: 0.000352  min_lr: 0.000352  loss: 2.9645 (2.9095)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3542 (1.4061)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [246]  [1200/2502]  eta: 0:07:17  lr: 0.000351  min_lr: 0.000351  loss: 3.0334 (2.9038)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4018 (1.4081)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [246]  [1400/2502]  eta: 0:06:09  lr: 0.000350  min_lr: 0.000350  loss: 2.7613 (2.9046)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4638 (1.4097)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [246]  [1600/2502]  eta: 0:05:02  lr: 0.000349  min_lr: 0.000349  loss: 3.0961 (2.9018)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3560 (1.4100)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [246]  [1800/2502]  eta: 0:03:55  lr: 0.000348  min_lr: 0.000348  loss: 3.0337 (2.9024)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3807 (1.4069)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [246]  [2000/2502]  eta: 0:02:48  lr: 0.000347  min_lr: 0.000347  loss: 2.9121 (2.9004)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4758 (1.4082)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [246]  [2200/2502]  eta: 0:01:41  lr: 0.000346  min_lr: 0.000346  loss: 3.1799 (2.9056)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4201 (1.4132)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [246]  [2400/2502]  eta: 0:00:34  lr: 0.000345  min_lr: 0.000345  loss: 3.0592 (2.9061)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3927 (1.4147)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [246]  [2501/2502]  eta: 0:00:00  lr: 0.000344  min_lr: 0.000344  loss: 2.8915 (2.9068)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3872 (1.4144)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [246] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000344  min_lr: 0.000344  loss: 2.8915 (2.9092)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3872 (1.4144)
Test:  [ 0/50]  eta: 0:03:10  loss: 0.4855 (0.4855)  acc1: 95.2000 (95.2000)  acc5: 99.2000 (99.2000)  time: 3.8164  data: 3.6151  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7440 (0.7406)  acc1: 88.4000 (87.6000)  acc5: 98.8000 (98.1455)  time: 0.4971  data: 0.3291  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7623 (0.7700)  acc1: 84.8000 (86.5143)  acc5: 98.0000 (98.0571)  time: 0.1772  data: 0.0128  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9029 (0.8599)  acc1: 82.8000 (84.6065)  acc5: 96.4000 (96.8645)  time: 0.1879  data: 0.0239  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0735 (0.9068)  acc1: 79.6000 (83.1610)  acc5: 94.4000 (96.4390)  time: 0.1882  data: 0.0238  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0516 (0.9223)  acc1: 78.4000 (82.6720)  acc5: 94.8000 (96.3200)  time: 0.1880  data: 0.0236  max mem: 27255
Test: Total time: 0:00:12 (0.2537 s / it)
* Acc@1 82.758 Acc@5 96.374 loss 0.916
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.76%
Epoch: [247]  [   0/2502]  eta: 1:27:46  lr: 0.000344  min_lr: 0.000344  loss: 2.8606 (2.8606)  weight_decay: 0.0500 (0.0500)  time: 2.1049  data: 1.7567  max mem: 27255
Epoch: [247]  [ 200/2502]  eta: 0:13:10  lr: 0.000343  min_lr: 0.000343  loss: 2.8786 (2.9215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3704 (1.5229)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [247]  [ 400/2502]  eta: 0:11:51  lr: 0.000342  min_lr: 0.000342  loss: 3.0840 (2.9160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5286 (1.5124)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [247]  [ 600/2502]  eta: 0:10:41  lr: 0.000341  min_lr: 0.000341  loss: 2.7249 (2.9024)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4530 (1.4918)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [247]  [ 800/2502]  eta: 0:09:33  lr: 0.000340  min_lr: 0.000340  loss: 2.9125 (2.9100)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3994 (1.4768)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [247]  [1000/2502]  eta: 0:08:24  lr: 0.000339  min_lr: 0.000339  loss: 3.0076 (2.9055)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4187 (1.4735)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [247]  [1200/2502]  eta: 0:07:17  lr: 0.000338  min_lr: 0.000338  loss: 3.1091 (2.9087)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4062 (1.4647)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [247]  [1400/2502]  eta: 0:06:09  lr: 0.000337  min_lr: 0.000337  loss: 2.9048 (2.9007)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2778 (1.4523)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [247]  [1600/2502]  eta: 0:05:02  lr: 0.000336  min_lr: 0.000336  loss: 3.1651 (2.9089)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4257 (1.4506)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [247]  [1800/2502]  eta: 0:03:55  lr: 0.000335  min_lr: 0.000335  loss: 2.9623 (2.9040)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4434 (1.4480)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [247]  [2000/2502]  eta: 0:02:48  lr: 0.000334  min_lr: 0.000334  loss: 2.9798 (2.9057)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4049 (1.4496)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [247]  [2200/2502]  eta: 0:01:41  lr: 0.000333  min_lr: 0.000333  loss: 2.9991 (2.9068)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3033 (1.4457)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [247]  [2400/2502]  eta: 0:00:34  lr: 0.000332  min_lr: 0.000332  loss: 3.0025 (2.9076)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4169 (1.4519)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [247]  [2501/2502]  eta: 0:00:00  lr: 0.000332  min_lr: 0.000332  loss: 3.0364 (2.9104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3959 (1.4521)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [247] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.000332  min_lr: 0.000332  loss: 3.0364 (2.9069)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3959 (1.4521)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.5273 (0.5273)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.6103  data: 2.4178  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7973 (0.7943)  acc1: 88.8000 (88.1091)  acc5: 98.4000 (97.9636)  time: 0.4401  data: 0.2736  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8243 (0.8214)  acc1: 84.8000 (86.8762)  acc5: 98.0000 (97.9048)  time: 0.2313  data: 0.0672  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9978 (0.9182)  acc1: 82.4000 (84.7742)  acc5: 97.2000 (96.8645)  time: 0.2294  data: 0.0654  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1427 (0.9703)  acc1: 79.2000 (83.3463)  acc5: 94.4000 (96.4098)  time: 0.2227  data: 0.0589  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1325 (0.9879)  acc1: 79.2000 (82.7520)  acc5: 95.2000 (96.3520)  time: 0.1948  data: 0.0312  max mem: 27255
Test: Total time: 0:00:13 (0.2651 s / it)
* Acc@1 82.710 Acc@5 96.414 loss 0.983
Accuracy of the model on the 50000 test images: 82.7%
Max accuracy: 82.76%
Epoch: [248]  [   0/2502]  eta: 1:21:17  lr: 0.000332  min_lr: 0.000332  loss: 3.1930 (3.1930)  weight_decay: 0.0500 (0.0500)  time: 1.9493  data: 1.5508  max mem: 27255
Epoch: [248]  [ 200/2502]  eta: 0:13:06  lr: 0.000331  min_lr: 0.000331  loss: 2.7747 (2.8522)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3611 (1.3952)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [248]  [ 400/2502]  eta: 0:11:50  lr: 0.000330  min_lr: 0.000330  loss: 2.9186 (2.8870)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3637 (1.3886)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [248]  [ 600/2502]  eta: 0:10:42  lr: 0.000329  min_lr: 0.000329  loss: 2.9659 (2.8776)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4157 (1.4012)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [248]  [ 800/2502]  eta: 0:09:32  lr: 0.000328  min_lr: 0.000328  loss: 2.8656 (2.8770)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3512 (1.4278)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [248]  [1000/2502]  eta: 0:08:24  lr: 0.000327  min_lr: 0.000327  loss: 3.0538 (2.8851)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4449 (1.4435)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [248]  [1200/2502]  eta: 0:07:16  lr: 0.000326  min_lr: 0.000326  loss: 3.1240 (2.8881)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3521 (1.4363)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [248]  [1400/2502]  eta: 0:06:09  lr: 0.000325  min_lr: 0.000325  loss: 3.0111 (2.8863)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4656 (1.4428)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [248]  [1600/2502]  eta: 0:05:02  lr: 0.000324  min_lr: 0.000324  loss: 2.9844 (2.8839)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5003 (1.4464)  time: 0.3366  data: 0.0005  max mem: 27255
Epoch: [248]  [1800/2502]  eta: 0:03:55  lr: 0.000323  min_lr: 0.000323  loss: 2.7659 (2.8906)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3239 (1.4487)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [248]  [2000/2502]  eta: 0:02:48  lr: 0.000322  min_lr: 0.000322  loss: 2.8520 (2.8920)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4018 (1.4525)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [248]  [2200/2502]  eta: 0:01:41  lr: 0.000321  min_lr: 0.000321  loss: 3.2213 (2.8936)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4049 (1.4505)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [248]  [2400/2502]  eta: 0:00:34  lr: 0.000320  min_lr: 0.000320  loss: 2.8313 (2.8944)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3704 (1.4490)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [248]  [2501/2502]  eta: 0:00:00  lr: 0.000320  min_lr: 0.000320  loss: 2.9664 (2.8939)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4453 (1.4497)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [248] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000320  min_lr: 0.000320  loss: 2.9664 (2.8988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4453 (1.4497)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.5277 (0.5277)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 2.7043  data: 2.5068  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8257 (0.8041)  acc1: 88.0000 (87.7455)  acc5: 98.4000 (98.0364)  time: 0.4183  data: 0.2513  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8257 (0.8313)  acc1: 84.4000 (86.7429)  acc5: 98.4000 (97.9619)  time: 0.2352  data: 0.0707  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9872 (0.9307)  acc1: 82.4000 (84.4516)  acc5: 96.4000 (96.8387)  time: 0.2484  data: 0.0840  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1518 (0.9782)  acc1: 79.2000 (83.1610)  acc5: 94.4000 (96.3512)  time: 0.2031  data: 0.0393  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1095 (0.9996)  acc1: 78.8000 (82.4880)  acc5: 95.2000 (96.3120)  time: 0.1769  data: 0.0132  max mem: 27255
Test: Total time: 0:00:13 (0.2611 s / it)
* Acc@1 82.824 Acc@5 96.348 loss 0.992
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.82%
Epoch: [249]  [   0/2502]  eta: 1:12:31  lr: 0.000320  min_lr: 0.000320  loss: 3.3486 (3.3486)  weight_decay: 0.0500 (0.0500)  time: 1.7391  data: 1.4001  max mem: 27255
Epoch: [249]  [ 200/2502]  eta: 0:13:07  lr: 0.000319  min_lr: 0.000319  loss: 2.7565 (2.8387)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4217 (1.4235)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [249]  [ 400/2502]  eta: 0:11:50  lr: 0.000318  min_lr: 0.000318  loss: 2.6374 (2.8461)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3346 (1.4201)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [249]  [ 600/2502]  eta: 0:10:40  lr: 0.000317  min_lr: 0.000317  loss: 2.9689 (2.8650)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.2932 (1.4288)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [249]  [ 800/2502]  eta: 0:09:32  lr: 0.000316  min_lr: 0.000316  loss: 3.0710 (2.8692)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6384 (1.4574)  time: 0.3416  data: 0.0003  max mem: 27255
Epoch: [249]  [1000/2502]  eta: 0:08:24  lr: 0.000315  min_lr: 0.000315  loss: 3.0395 (2.8845)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4643 (1.4698)  time: 0.3359  data: 0.0004  max mem: 27255
Epoch: [249]  [1200/2502]  eta: 0:07:16  lr: 0.000314  min_lr: 0.000314  loss: 2.9483 (2.8939)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4885 (1.4653)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [249]  [1400/2502]  eta: 0:06:09  lr: 0.000313  min_lr: 0.000313  loss: 2.8171 (2.8897)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5633 (1.4691)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [249]  [1600/2502]  eta: 0:05:02  lr: 0.000312  min_lr: 0.000312  loss: 3.0187 (2.8912)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4675 (1.4739)  time: 0.3364  data: 0.0005  max mem: 27255
Epoch: [249]  [1800/2502]  eta: 0:03:55  lr: 0.000311  min_lr: 0.000311  loss: 3.1541 (2.8920)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4538 (1.4809)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [249]  [2000/2502]  eta: 0:02:48  lr: 0.000310  min_lr: 0.000310  loss: 3.0887 (2.8959)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4075 (1.4824)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [249]  [2200/2502]  eta: 0:01:41  lr: 0.000309  min_lr: 0.000309  loss: 2.8874 (2.8950)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3775 (1.4830)  time: 0.3405  data: 0.0003  max mem: 27255
Epoch: [249]  [2400/2502]  eta: 0:00:34  lr: 0.000308  min_lr: 0.000308  loss: 2.9473 (2.8949)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5400 (1.4909)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [249]  [2501/2502]  eta: 0:00:00  lr: 0.000307  min_lr: 0.000307  loss: 2.9769 (2.8945)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3052 (1.4889)  time: 0.3006  data: 0.0008  max mem: 27255
Epoch: [249] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000307  min_lr: 0.000307  loss: 2.9769 (2.9002)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3052 (1.4889)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.5374 (0.5374)  acc1: 94.8000 (94.8000)  acc5: 98.8000 (98.8000)  time: 3.2416  data: 3.0422  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8122 (0.7902)  acc1: 90.0000 (87.9273)  acc5: 98.4000 (98.1091)  time: 0.4441  data: 0.2769  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8139 (0.8169)  acc1: 84.8000 (86.9143)  acc5: 98.0000 (98.0381)  time: 0.1913  data: 0.0272  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9356 (0.9098)  acc1: 83.2000 (84.8903)  acc5: 96.8000 (96.9548)  time: 0.1985  data: 0.0346  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1128 (0.9614)  acc1: 80.0000 (83.3756)  acc5: 94.4000 (96.4585)  time: 0.1790  data: 0.0153  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0877 (0.9829)  acc1: 78.0000 (82.7440)  acc5: 95.6000 (96.4320)  time: 0.1922  data: 0.0267  max mem: 27255
Test: Total time: 0:00:12 (0.2499 s / it)
* Acc@1 82.768 Acc@5 96.458 loss 0.975
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.82%
Epoch: [250]  [   0/2502]  eta: 1:32:56  lr: 0.000307  min_lr: 0.000307  loss: 3.0244 (3.0244)  weight_decay: 0.0500 (0.0500)  time: 2.2289  data: 1.6973  max mem: 27255
Epoch: [250]  [ 200/2502]  eta: 0:13:11  lr: 0.000307  min_lr: 0.000307  loss: 2.9679 (2.8631)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4371 (1.4867)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [250]  [ 400/2502]  eta: 0:11:52  lr: 0.000306  min_lr: 0.000306  loss: 2.8787 (2.8574)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3414 (1.4658)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [250]  [ 600/2502]  eta: 0:10:41  lr: 0.000305  min_lr: 0.000305  loss: 2.8971 (2.8632)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3713 (1.4732)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [250]  [ 800/2502]  eta: 0:09:33  lr: 0.000304  min_lr: 0.000304  loss: 3.0082 (2.8725)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5394 (1.4838)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [250]  [1000/2502]  eta: 0:08:25  lr: 0.000303  min_lr: 0.000303  loss: 3.1139 (2.8802)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3359 (1.4823)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [250]  [1200/2502]  eta: 0:07:17  lr: 0.000302  min_lr: 0.000302  loss: 2.8648 (2.8875)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3693 (1.4758)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [250]  [1400/2502]  eta: 0:06:10  lr: 0.000301  min_lr: 0.000301  loss: 3.0246 (2.8815)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4072 (1.4687)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [250]  [1600/2502]  eta: 0:05:02  lr: 0.000300  min_lr: 0.000300  loss: 2.7519 (2.8754)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4205 (1.4709)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [250]  [1800/2502]  eta: 0:03:55  lr: 0.000299  min_lr: 0.000299  loss: 2.9531 (2.8798)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4358 (inf)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [250]  [2000/2502]  eta: 0:02:48  lr: 0.000298  min_lr: 0.000298  loss: 2.9328 (2.8843)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4798 (inf)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [250]  [2200/2502]  eta: 0:01:41  lr: 0.000297  min_lr: 0.000297  loss: 2.9091 (2.8887)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4524 (inf)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [250]  [2400/2502]  eta: 0:00:34  lr: 0.000296  min_lr: 0.000296  loss: 3.0740 (2.8857)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5226 (inf)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [250]  [2501/2502]  eta: 0:00:00  lr: 0.000296  min_lr: 0.000296  loss: 2.9492 (2.8868)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5583 (inf)  time: 0.3018  data: 0.0009  max mem: 27255
Epoch: [250] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000296  min_lr: 0.000296  loss: 2.9492 (2.8894)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5583 (inf)
Test:  [ 0/50]  eta: 0:02:16  loss: 0.5685 (0.5685)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.7337  data: 2.5418  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8180 (0.8352)  acc1: 89.2000 (88.0000)  acc5: 98.4000 (98.0364)  time: 0.4076  data: 0.2405  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8480 (0.8523)  acc1: 84.4000 (86.8000)  acc5: 98.0000 (97.9238)  time: 0.2134  data: 0.0493  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9857 (0.9465)  acc1: 83.2000 (84.8903)  acc5: 96.0000 (96.6452)  time: 0.2554  data: 0.0918  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1455 (0.9966)  acc1: 79.2000 (83.5317)  acc5: 94.0000 (96.1561)  time: 0.2113  data: 0.0477  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1291 (1.0178)  acc1: 78.8000 (82.8880)  acc5: 94.8000 (96.0800)  time: 0.1701  data: 0.0066  max mem: 27255
Test: Total time: 0:00:12 (0.2590 s / it)
* Acc@1 82.736 Acc@5 96.328 loss 1.012
Accuracy of the model on the 50000 test images: 82.7%
Max accuracy: 82.82%
Epoch: [251]  [   0/2502]  eta: 1:33:52  lr: 0.000296  min_lr: 0.000296  loss: 3.3433 (3.3433)  weight_decay: 0.0500 (0.0500)  time: 2.2511  data: 1.8991  max mem: 27255
Epoch: [251]  [ 200/2502]  eta: 0:13:17  lr: 0.000295  min_lr: 0.000295  loss: 3.0469 (2.8954)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4417 (1.4853)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [251]  [ 400/2502]  eta: 0:11:56  lr: 0.000294  min_lr: 0.000294  loss: 2.9148 (2.8972)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4602 (1.4852)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [251]  [ 600/2502]  eta: 0:10:45  lr: 0.000293  min_lr: 0.000293  loss: 3.1531 (2.8971)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3976 (1.4838)  time: 0.3359  data: 0.0005  max mem: 27255
Epoch: [251]  [ 800/2502]  eta: 0:09:35  lr: 0.000292  min_lr: 0.000292  loss: 2.9306 (2.8927)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5221 (1.4893)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [251]  [1000/2502]  eta: 0:08:26  lr: 0.000291  min_lr: 0.000291  loss: 3.0090 (2.8906)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3851 (1.4932)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [251]  [1200/2502]  eta: 0:07:18  lr: 0.000290  min_lr: 0.000290  loss: 3.0803 (2.8947)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5205 (1.4913)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [251]  [1400/2502]  eta: 0:06:11  lr: 0.000289  min_lr: 0.000289  loss: 2.8682 (2.8915)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5235 (1.4961)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [251]  [1600/2502]  eta: 0:05:03  lr: 0.000288  min_lr: 0.000288  loss: 3.0274 (2.8947)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5227 (1.5082)  time: 0.3420  data: 0.0004  max mem: 27255
Epoch: [251]  [1800/2502]  eta: 0:03:56  lr: 0.000287  min_lr: 0.000287  loss: 3.1887 (2.8979)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4453 (1.5120)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [251]  [2000/2502]  eta: 0:02:48  lr: 0.000286  min_lr: 0.000286  loss: 3.0459 (2.8988)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5504 (1.5145)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [251]  [2200/2502]  eta: 0:01:41  lr: 0.000285  min_lr: 0.000285  loss: 2.9652 (2.8985)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5915 (1.5155)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [251]  [2400/2502]  eta: 0:00:34  lr: 0.000284  min_lr: 0.000284  loss: 2.9420 (2.8928)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4085 (1.5090)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [251]  [2501/2502]  eta: 0:00:00  lr: 0.000284  min_lr: 0.000284  loss: 3.1513 (2.8925)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5647 (1.5105)  time: 0.3006  data: 0.0008  max mem: 27255
Epoch: [251] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.000284  min_lr: 0.000284  loss: 3.1513 (2.8840)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5647 (1.5105)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.5955 (0.5955)  acc1: 93.6000 (93.6000)  acc5: 98.8000 (98.8000)  time: 3.1048  data: 2.9101  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8045 (0.8190)  acc1: 87.6000 (87.5636)  acc5: 98.4000 (98.0000)  time: 0.4333  data: 0.2665  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8515 (0.8426)  acc1: 85.2000 (86.2667)  acc5: 98.0000 (97.9048)  time: 0.1701  data: 0.0061  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9785 (0.9263)  acc1: 82.4000 (84.6323)  acc5: 95.6000 (96.7097)  time: 0.1714  data: 0.0074  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1289 (0.9737)  acc1: 79.6000 (83.1805)  acc5: 94.4000 (96.2049)  time: 0.1946  data: 0.0307  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0692 (0.9909)  acc1: 78.8000 (82.6240)  acc5: 95.2000 (96.2240)  time: 0.1924  data: 0.0286  max mem: 27255
Test: Total time: 0:00:12 (0.2409 s / it)
* Acc@1 82.884 Acc@5 96.402 loss 0.983
Accuracy of the model on the 50000 test images: 82.9%
Max accuracy: 82.88%
Epoch: [252]  [   0/2502]  eta: 1:29:31  lr: 0.000284  min_lr: 0.000284  loss: 2.3190 (2.3190)  weight_decay: 0.0500 (0.0500)  time: 2.1471  data: 1.8092  max mem: 27255
Epoch: [252]  [ 200/2502]  eta: 0:13:09  lr: 0.000283  min_lr: 0.000283  loss: 2.8549 (2.9105)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4149 (1.5091)  time: 0.3350  data: 0.0003  max mem: 27255
Epoch: [252]  [ 400/2502]  eta: 0:11:52  lr: 0.000282  min_lr: 0.000282  loss: 2.6202 (2.8999)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4535 (1.5075)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [252]  [ 600/2502]  eta: 0:10:41  lr: 0.000281  min_lr: 0.000281  loss: 3.1428 (2.8955)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5757 (1.5093)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [252]  [ 800/2502]  eta: 0:09:32  lr: 0.000280  min_lr: 0.000280  loss: 3.0895 (2.8912)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4195 (1.5002)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [252]  [1000/2502]  eta: 0:08:24  lr: 0.000279  min_lr: 0.000279  loss: 2.9832 (2.8938)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6135 (1.5188)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [252]  [1200/2502]  eta: 0:07:17  lr: 0.000279  min_lr: 0.000279  loss: 3.0384 (2.8934)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4962 (1.5170)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [252]  [1400/2502]  eta: 0:06:10  lr: 0.000278  min_lr: 0.000278  loss: 3.0040 (2.8936)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4368 (1.5244)  time: 0.3413  data: 0.0005  max mem: 27255
Epoch: [252]  [1600/2502]  eta: 0:05:02  lr: 0.000277  min_lr: 0.000277  loss: 2.9605 (2.8931)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4392 (1.5203)  time: 0.3411  data: 0.0004  max mem: 27255
Epoch: [252]  [1800/2502]  eta: 0:03:55  lr: 0.000276  min_lr: 0.000276  loss: 2.7682 (2.8901)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4237 (1.5148)  time: 0.3360  data: 0.0005  max mem: 27255
Epoch: [252]  [2000/2502]  eta: 0:02:48  lr: 0.000275  min_lr: 0.000275  loss: 3.0785 (2.8940)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4422 (1.5096)  time: 0.3361  data: 0.0004  max mem: 27255
Epoch: [252]  [2200/2502]  eta: 0:01:41  lr: 0.000274  min_lr: 0.000274  loss: 3.0415 (2.8907)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6153 (1.5256)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [252]  [2400/2502]  eta: 0:00:34  lr: 0.000273  min_lr: 0.000273  loss: 2.5106 (2.8900)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4946 (1.5266)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [252]  [2501/2502]  eta: 0:00:00  lr: 0.000273  min_lr: 0.000273  loss: 2.7758 (2.8870)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5183 (1.5275)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [252] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000273  min_lr: 0.000273  loss: 2.7758 (2.8709)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5183 (1.5275)
Test:  [ 0/50]  eta: 0:02:35  loss: 0.4480 (0.4480)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.1188  data: 2.9397  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7453 (0.7275)  acc1: 87.6000 (87.2727)  acc5: 98.8000 (98.0727)  time: 0.4848  data: 0.3193  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7481 (0.7497)  acc1: 84.4000 (86.3810)  acc5: 98.0000 (97.9429)  time: 0.2131  data: 0.0465  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9133 (0.8385)  acc1: 82.8000 (84.6323)  acc5: 96.4000 (96.8258)  time: 0.2305  data: 0.0637  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0209 (0.8881)  acc1: 78.4000 (83.2195)  acc5: 94.4000 (96.3805)  time: 0.2249  data: 0.0607  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0209 (0.9095)  acc1: 78.0000 (82.6080)  acc5: 95.6000 (96.3200)  time: 0.1788  data: 0.0150  max mem: 27255
Test: Total time: 0:00:13 (0.2693 s / it)
* Acc@1 82.834 Acc@5 96.464 loss 0.902
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 82.88%
Epoch: [253]  [   0/2502]  eta: 1:28:50  lr: 0.000273  min_lr: 0.000273  loss: 2.7418 (2.7418)  weight_decay: 0.0500 (0.0500)  time: 2.1306  data: 1.5547  max mem: 27255
Epoch: [253]  [ 200/2502]  eta: 0:13:10  lr: 0.000272  min_lr: 0.000272  loss: 3.0069 (2.8968)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5425 (1.6157)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [253]  [ 400/2502]  eta: 0:11:53  lr: 0.000271  min_lr: 0.000271  loss: 2.8619 (2.8889)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4104 (1.5625)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [253]  [ 600/2502]  eta: 0:10:43  lr: 0.000270  min_lr: 0.000270  loss: 2.9812 (2.8937)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5137 (1.5478)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [253]  [ 800/2502]  eta: 0:09:33  lr: 0.000269  min_lr: 0.000269  loss: 2.9519 (2.8987)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5794 (1.5588)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [253]  [1000/2502]  eta: 0:08:25  lr: 0.000268  min_lr: 0.000268  loss: 3.0537 (2.9029)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4772 (1.5404)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [253]  [1200/2502]  eta: 0:07:17  lr: 0.000267  min_lr: 0.000267  loss: 2.7887 (2.9008)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4170 (1.5383)  time: 0.3353  data: 0.0004  max mem: 27255
Epoch: [253]  [1400/2502]  eta: 0:06:10  lr: 0.000266  min_lr: 0.000266  loss: 2.9888 (2.9037)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5471 (1.5444)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [253]  [1600/2502]  eta: 0:05:02  lr: 0.000265  min_lr: 0.000265  loss: 2.7336 (2.9011)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5143 (1.5475)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [253]  [1800/2502]  eta: 0:03:55  lr: 0.000265  min_lr: 0.000265  loss: 3.1138 (2.8961)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4965 (1.5457)  time: 0.3359  data: 0.0004  max mem: 27255
Epoch: [253]  [2000/2502]  eta: 0:02:48  lr: 0.000264  min_lr: 0.000264  loss: 2.9490 (2.8957)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5055 (1.5476)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [253]  [2200/2502]  eta: 0:01:41  lr: 0.000263  min_lr: 0.000263  loss: 2.7950 (2.8887)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4682 (1.5490)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [253]  [2400/2502]  eta: 0:00:34  lr: 0.000262  min_lr: 0.000262  loss: 2.9498 (2.8905)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5200 (1.5465)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [253]  [2501/2502]  eta: 0:00:00  lr: 0.000261  min_lr: 0.000261  loss: 2.8609 (2.8908)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4707 (1.5474)  time: 0.2998  data: 0.0009  max mem: 27255
Epoch: [253] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000261  min_lr: 0.000261  loss: 2.8609 (2.8834)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4707 (1.5474)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.5154 (0.5154)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.9550  data: 2.7603  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7929 (0.7738)  acc1: 88.0000 (87.9636)  acc5: 98.8000 (98.2182)  time: 0.4807  data: 0.3140  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.7937 (0.8090)  acc1: 85.6000 (86.7810)  acc5: 98.0000 (98.1143)  time: 0.2474  data: 0.0836  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9968 (0.8991)  acc1: 82.0000 (84.8258)  acc5: 96.4000 (96.8645)  time: 0.2334  data: 0.0696  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0708 (0.9487)  acc1: 80.4000 (83.4244)  acc5: 94.4000 (96.4390)  time: 0.2030  data: 0.0394  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0683 (0.9679)  acc1: 79.2000 (82.7280)  acc5: 95.6000 (96.3520)  time: 0.1822  data: 0.0187  max mem: 27255
Test: Total time: 0:00:13 (0.2707 s / it)
* Acc@1 82.914 Acc@5 96.430 loss 0.959
Accuracy of the model on the 50000 test images: 82.9%
Max accuracy: 82.91%
Epoch: [254]  [   0/2502]  eta: 1:15:59  lr: 0.000261  min_lr: 0.000261  loss: 2.3727 (2.3727)  weight_decay: 0.0500 (0.0500)  time: 1.8223  data: 1.4777  max mem: 27255
Epoch: [254]  [ 200/2502]  eta: 0:13:07  lr: 0.000261  min_lr: 0.000261  loss: 3.0192 (2.8215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6427 (1.6242)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [254]  [ 400/2502]  eta: 0:11:50  lr: 0.000260  min_lr: 0.000260  loss: 3.0642 (2.8457)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4937 (1.5765)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [254]  [ 600/2502]  eta: 0:10:40  lr: 0.000259  min_lr: 0.000259  loss: 2.9198 (2.8550)  weight_decay: 0.0500 (0.0500)  grad_norm: nan (nan)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [254]  [ 800/2502]  eta: 0:09:31  lr: 0.000258  min_lr: 0.000258  loss: 2.8981 (2.8557)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4499 (nan)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [254]  [1000/2502]  eta: 0:08:23  lr: 0.000257  min_lr: 0.000257  loss: 2.8967 (2.8539)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5913 (nan)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [254]  [1200/2502]  eta: 0:07:16  lr: 0.000256  min_lr: 0.000256  loss: 3.0517 (2.8532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5800 (nan)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [254]  [1400/2502]  eta: 0:06:09  lr: 0.000255  min_lr: 0.000255  loss: 2.8820 (2.8593)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4473 (nan)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [254]  [1600/2502]  eta: 0:05:02  lr: 0.000254  min_lr: 0.000254  loss: 2.8044 (2.8560)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5465 (nan)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [254]  [1800/2502]  eta: 0:03:55  lr: 0.000254  min_lr: 0.000254  loss: 2.8616 (2.8555)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5787 (nan)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [254]  [2000/2502]  eta: 0:02:48  lr: 0.000253  min_lr: 0.000253  loss: 2.7532 (2.8520)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4959 (nan)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [254]  [2200/2502]  eta: 0:01:41  lr: 0.000252  min_lr: 0.000252  loss: 2.9678 (2.8535)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5178 (nan)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [254]  [2400/2502]  eta: 0:00:34  lr: 0.000251  min_lr: 0.000251  loss: 2.9313 (2.8541)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4810 (nan)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [254]  [2501/2502]  eta: 0:00:00  lr: 0.000251  min_lr: 0.000251  loss: 2.8451 (2.8530)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5615 (nan)  time: 0.3003  data: 0.0008  max mem: 27255
Epoch: [254] Total time: 0:13:57 (0.3346 s / it)
Averaged stats: lr: 0.000251  min_lr: 0.000251  loss: 2.8451 (2.8644)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5615 (nan)
Test:  [ 0/50]  eta: 0:02:27  loss: 0.5071 (0.5071)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.9496  data: 2.7457  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8024 (0.7949)  acc1: 88.4000 (88.0000)  acc5: 98.8000 (98.1455)  time: 0.4329  data: 0.2655  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8044 (0.8131)  acc1: 86.0000 (86.8762)  acc5: 98.0000 (98.0571)  time: 0.1867  data: 0.0224  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9586 (0.9001)  acc1: 82.8000 (84.8645)  acc5: 96.8000 (96.9032)  time: 0.1972  data: 0.0327  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0602 (0.9482)  acc1: 80.0000 (83.4537)  acc5: 94.8000 (96.4878)  time: 0.2002  data: 0.0362  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0602 (0.9695)  acc1: 79.6000 (82.8480)  acc5: 95.2000 (96.3680)  time: 0.1816  data: 0.0173  max mem: 27255
Test: Total time: 0:00:12 (0.2453 s / it)
* Acc@1 83.122 Acc@5 96.504 loss 0.961
Accuracy of the model on the 50000 test images: 83.1%
Max accuracy: 83.12%
Epoch: [255]  [   0/2502]  eta: 1:29:34  lr: 0.000250  min_lr: 0.000250  loss: 3.2303 (3.2303)  weight_decay: 0.0500 (0.0500)  time: 2.1479  data: 1.7927  max mem: 27255
Epoch: [255]  [ 200/2502]  eta: 0:13:08  lr: 0.000250  min_lr: 0.000250  loss: 3.0416 (2.8650)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5467 (1.5519)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [255]  [ 400/2502]  eta: 0:11:51  lr: 0.000249  min_lr: 0.000249  loss: 3.0151 (2.8797)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4450 (1.5306)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [255]  [ 600/2502]  eta: 0:10:41  lr: 0.000248  min_lr: 0.000248  loss: 2.7459 (2.8592)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4731 (1.5198)  time: 0.3422  data: 0.0004  max mem: 27255
Epoch: [255]  [ 800/2502]  eta: 0:09:32  lr: 0.000247  min_lr: 0.000247  loss: 2.9859 (2.8586)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4746 (1.5330)  time: 0.3326  data: 0.0004  max mem: 27255
Epoch: [255]  [1000/2502]  eta: 0:08:25  lr: 0.000246  min_lr: 0.000246  loss: 2.9170 (2.8503)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5147 (1.5343)  time: 0.3428  data: 0.0005  max mem: 27255
Epoch: [255]  [1200/2502]  eta: 0:07:17  lr: 0.000245  min_lr: 0.000245  loss: 2.9058 (2.8529)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6229 (1.5407)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [255]  [1400/2502]  eta: 0:06:10  lr: 0.000244  min_lr: 0.000244  loss: 3.0884 (2.8637)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4968 (1.5417)  time: 0.3426  data: 0.0005  max mem: 27255
Epoch: [255]  [1600/2502]  eta: 0:05:02  lr: 0.000244  min_lr: 0.000244  loss: 2.9122 (2.8646)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4586 (1.5470)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [255]  [1800/2502]  eta: 0:03:55  lr: 0.000243  min_lr: 0.000243  loss: 2.8778 (2.8674)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4469 (1.5458)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [255]  [2000/2502]  eta: 0:02:48  lr: 0.000242  min_lr: 0.000242  loss: 2.9762 (2.8649)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5804 (1.5498)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [255]  [2200/2502]  eta: 0:01:41  lr: 0.000241  min_lr: 0.000241  loss: 3.1165 (2.8627)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4503 (1.5480)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [255]  [2400/2502]  eta: 0:00:34  lr: 0.000240  min_lr: 0.000240  loss: 2.7902 (2.8615)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6317 (1.5496)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [255]  [2501/2502]  eta: 0:00:00  lr: 0.000240  min_lr: 0.000240  loss: 2.9993 (2.8620)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3945 (1.5447)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [255] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000240  min_lr: 0.000240  loss: 2.9993 (2.8631)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3945 (1.5447)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.5063 (0.5063)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 2.6390  data: 2.4450  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7763 (0.7555)  acc1: 89.2000 (87.7455)  acc5: 98.8000 (98.0727)  time: 0.4278  data: 0.2612  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7809 (0.7836)  acc1: 85.2000 (86.7619)  acc5: 98.0000 (97.9619)  time: 0.2260  data: 0.0621  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9458 (0.8775)  acc1: 82.4000 (84.7613)  acc5: 96.0000 (96.7355)  time: 0.2388  data: 0.0749  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0861 (0.9292)  acc1: 79.2000 (83.3951)  acc5: 94.4000 (96.3902)  time: 0.1986  data: 0.0348  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0861 (0.9534)  acc1: 78.0000 (82.6640)  acc5: 95.6000 (96.3600)  time: 0.1644  data: 0.0007  max mem: 27255
Test: Total time: 0:00:12 (0.2538 s / it)
* Acc@1 82.826 Acc@5 96.490 loss 0.947
Accuracy of the model on the 50000 test images: 82.8%
Max accuracy: 83.12%
Epoch: [256]  [   0/2502]  eta: 1:21:54  lr: 0.000240  min_lr: 0.000240  loss: 2.9652 (2.9652)  weight_decay: 0.0500 (0.0500)  time: 1.9644  data: 1.6259  max mem: 27255
Epoch: [256]  [ 200/2502]  eta: 0:13:07  lr: 0.000239  min_lr: 0.000239  loss: 2.8617 (2.8766)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3665 (1.5622)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [256]  [ 400/2502]  eta: 0:11:50  lr: 0.000238  min_lr: 0.000238  loss: 2.9100 (2.8951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5163 (1.5966)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [256]  [ 600/2502]  eta: 0:10:39  lr: 0.000237  min_lr: 0.000237  loss: 3.0224 (2.8611)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6763 (1.5951)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [256]  [ 800/2502]  eta: 0:09:31  lr: 0.000236  min_lr: 0.000236  loss: 2.9771 (2.8543)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4632 (1.5903)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [256]  [1000/2502]  eta: 0:08:24  lr: 0.000236  min_lr: 0.000236  loss: 2.9974 (2.8532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.3809 (1.5848)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [256]  [1200/2502]  eta: 0:07:16  lr: 0.000235  min_lr: 0.000235  loss: 2.9698 (2.8562)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5043 (1.5799)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [256]  [1400/2502]  eta: 0:06:09  lr: 0.000234  min_lr: 0.000234  loss: 2.9040 (2.8552)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4849 (1.5707)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [256]  [1600/2502]  eta: 0:05:02  lr: 0.000233  min_lr: 0.000233  loss: 2.7082 (2.8521)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4364 (1.5569)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [256]  [1800/2502]  eta: 0:03:55  lr: 0.000232  min_lr: 0.000232  loss: 3.0556 (2.8578)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6288 (1.5657)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [256]  [2000/2502]  eta: 0:02:48  lr: 0.000231  min_lr: 0.000231  loss: 2.8879 (2.8586)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4347 (1.5601)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [256]  [2200/2502]  eta: 0:01:41  lr: 0.000230  min_lr: 0.000230  loss: 2.8431 (2.8571)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5965 (1.5658)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [256]  [2400/2502]  eta: 0:00:34  lr: 0.000230  min_lr: 0.000230  loss: 3.1688 (2.8569)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4655 (1.5613)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [256]  [2501/2502]  eta: 0:00:00  lr: 0.000229  min_lr: 0.000229  loss: 2.8912 (2.8595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4512 (1.5621)  time: 0.3005  data: 0.0007  max mem: 27255
Epoch: [256] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000229  min_lr: 0.000229  loss: 2.8912 (2.8572)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4512 (1.5621)
Test:  [ 0/50]  eta: 0:02:40  loss: 0.5571 (0.5571)  acc1: 94.0000 (94.0000)  acc5: 98.8000 (98.8000)  time: 3.2074  data: 3.0053  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7870 (0.8012)  acc1: 89.6000 (87.7091)  acc5: 98.8000 (98.2182)  time: 0.4487  data: 0.2808  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8022 (0.8264)  acc1: 85.6000 (86.8952)  acc5: 98.0000 (98.0762)  time: 0.1686  data: 0.0044  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9577 (0.9141)  acc1: 83.6000 (85.1226)  acc5: 96.4000 (96.9419)  time: 0.1721  data: 0.0081  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1090 (0.9660)  acc1: 78.8000 (83.6000)  acc5: 94.8000 (96.4488)  time: 0.1835  data: 0.0195  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1208 (0.9852)  acc1: 78.4000 (82.8800)  acc5: 95.6000 (96.4240)  time: 0.2036  data: 0.0399  max mem: 27255
Test: Total time: 0:00:12 (0.2450 s / it)
* Acc@1 82.984 Acc@5 96.490 loss 0.981
Accuracy of the model on the 50000 test images: 83.0%
Max accuracy: 83.12%
Epoch: [257]  [   0/2502]  eta: 1:28:06  lr: 0.000229  min_lr: 0.000229  loss: 2.8893 (2.8893)  weight_decay: 0.0500 (0.0500)  time: 2.1128  data: 1.5952  max mem: 27255
Epoch: [257]  [ 200/2502]  eta: 0:13:09  lr: 0.000228  min_lr: 0.000228  loss: 2.9335 (2.8084)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4230 (1.5379)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [257]  [ 400/2502]  eta: 0:11:51  lr: 0.000228  min_lr: 0.000228  loss: 2.8519 (2.8224)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4864 (1.5506)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [257]  [ 600/2502]  eta: 0:10:40  lr: 0.000227  min_lr: 0.000227  loss: 2.8540 (2.8213)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5412 (1.5739)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [257]  [ 800/2502]  eta: 0:09:32  lr: 0.000226  min_lr: 0.000226  loss: 2.9294 (2.8309)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5138 (1.5749)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [257]  [1000/2502]  eta: 0:08:24  lr: 0.000225  min_lr: 0.000225  loss: 2.8617 (2.8351)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5098 (1.5786)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [257]  [1200/2502]  eta: 0:07:17  lr: 0.000224  min_lr: 0.000224  loss: 3.0045 (2.8303)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5839 (1.5777)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [257]  [1400/2502]  eta: 0:06:09  lr: 0.000223  min_lr: 0.000223  loss: 3.0303 (2.8357)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4454 (1.5740)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [257]  [1600/2502]  eta: 0:05:02  lr: 0.000223  min_lr: 0.000223  loss: 2.9256 (2.8358)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5160 (1.5828)  time: 0.3418  data: 0.0003  max mem: 27255
Epoch: [257]  [1800/2502]  eta: 0:03:55  lr: 0.000222  min_lr: 0.000222  loss: 2.9498 (2.8399)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4589 (1.5752)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [257]  [2000/2502]  eta: 0:02:48  lr: 0.000221  min_lr: 0.000221  loss: 2.8178 (2.8370)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6648 (1.5759)  time: 0.3356  data: 0.0005  max mem: 27255
Epoch: [257]  [2200/2502]  eta: 0:01:41  lr: 0.000220  min_lr: 0.000220  loss: 2.8987 (2.8423)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6436 (1.5808)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [257]  [2400/2502]  eta: 0:00:34  lr: 0.000219  min_lr: 0.000219  loss: 3.0655 (2.8409)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5725 (1.5823)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [257]  [2501/2502]  eta: 0:00:00  lr: 0.000219  min_lr: 0.000219  loss: 3.0336 (2.8410)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5119 (1.5823)  time: 0.3002  data: 0.0007  max mem: 27255
Epoch: [257] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.000219  min_lr: 0.000219  loss: 3.0336 (2.8476)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5119 (1.5823)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.5122 (0.5122)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.0629  data: 2.8619  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7857 (0.7829)  acc1: 88.4000 (88.0727)  acc5: 98.4000 (98.0364)  time: 0.4769  data: 0.3079  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8026 (0.8180)  acc1: 85.6000 (86.8762)  acc5: 98.0000 (98.0571)  time: 0.2022  data: 0.0374  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9548 (0.9106)  acc1: 82.8000 (84.7484)  acc5: 96.8000 (96.8774)  time: 0.1922  data: 0.0283  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1357 (0.9586)  acc1: 79.6000 (83.3073)  acc5: 94.4000 (96.4878)  time: 0.2113  data: 0.0468  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1357 (0.9775)  acc1: 78.4000 (82.7760)  acc5: 95.2000 (96.4320)  time: 0.2110  data: 0.0467  max mem: 27255
Test: Total time: 0:00:12 (0.2581 s / it)
* Acc@1 83.100 Acc@5 96.496 loss 0.971
Accuracy of the model on the 50000 test images: 83.1%
Max accuracy: 83.12%
Epoch: [258]  [   0/2502]  eta: 1:31:25  lr: 0.000219  min_lr: 0.000219  loss: 2.8589 (2.8589)  weight_decay: 0.0500 (0.0500)  time: 2.1925  data: 1.8494  max mem: 27255
Epoch: [258]  [ 200/2502]  eta: 0:13:11  lr: 0.000218  min_lr: 0.000218  loss: 3.0648 (2.7825)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5177 (1.6455)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [258]  [ 400/2502]  eta: 0:11:53  lr: 0.000217  min_lr: 0.000217  loss: 2.8614 (2.8141)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4873 (1.5970)  time: 0.3426  data: 0.0004  max mem: 27255
Epoch: [258]  [ 600/2502]  eta: 0:10:42  lr: 0.000216  min_lr: 0.000216  loss: 2.9016 (2.7975)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5351 (1.5787)  time: 0.3431  data: 0.0006  max mem: 27255
Epoch: [258]  [ 800/2502]  eta: 0:09:33  lr: 0.000216  min_lr: 0.000216  loss: 2.9193 (2.8054)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5352 (1.5863)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [258]  [1000/2502]  eta: 0:08:25  lr: 0.000215  min_lr: 0.000215  loss: 3.0360 (2.8110)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4584 (1.5712)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [258]  [1200/2502]  eta: 0:07:17  lr: 0.000214  min_lr: 0.000214  loss: 2.9007 (2.8239)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5495 (1.5837)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [258]  [1400/2502]  eta: 0:06:09  lr: 0.000213  min_lr: 0.000213  loss: 2.9816 (2.8301)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5716 (1.5760)  time: 0.3367  data: 0.0003  max mem: 27255
Epoch: [258]  [1600/2502]  eta: 0:05:02  lr: 0.000212  min_lr: 0.000212  loss: 2.9351 (2.8350)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5813 (1.5776)  time: 0.3328  data: 0.0003  max mem: 27255
Epoch: [258]  [1800/2502]  eta: 0:03:55  lr: 0.000212  min_lr: 0.000212  loss: 2.9334 (2.8295)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5788 (1.5825)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [258]  [2000/2502]  eta: 0:02:48  lr: 0.000211  min_lr: 0.000211  loss: 2.9251 (2.8280)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4095 (1.5720)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [258]  [2200/2502]  eta: 0:01:41  lr: 0.000210  min_lr: 0.000210  loss: 2.8108 (2.8271)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5532 (1.5774)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [258]  [2400/2502]  eta: 0:00:34  lr: 0.000209  min_lr: 0.000209  loss: 3.0551 (2.8281)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5716 (1.5816)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [258]  [2501/2502]  eta: 0:00:00  lr: 0.000209  min_lr: 0.000209  loss: 3.1523 (2.8293)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5792 (1.5849)  time: 0.3003  data: 0.0007  max mem: 27255
Epoch: [258] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000209  min_lr: 0.000209  loss: 3.1523 (2.8469)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5792 (1.5849)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.4948 (0.4948)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.9246  data: 2.7153  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7348 (0.7444)  acc1: 89.2000 (87.6364)  acc5: 98.4000 (97.9636)  time: 0.4426  data: 0.2742  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7676 (0.7716)  acc1: 84.8000 (86.7429)  acc5: 98.4000 (98.0952)  time: 0.2190  data: 0.0549  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9301 (0.8619)  acc1: 82.8000 (84.7226)  acc5: 96.8000 (97.0065)  time: 0.2526  data: 0.0886  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0688 (0.9093)  acc1: 79.2000 (83.4049)  acc5: 94.8000 (96.5659)  time: 0.2145  data: 0.0507  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0688 (0.9294)  acc1: 78.4000 (82.6960)  acc5: 95.2000 (96.4800)  time: 0.1657  data: 0.0020  max mem: 27255
Test: Total time: 0:00:13 (0.2636 s / it)
* Acc@1 82.974 Acc@5 96.478 loss 0.924
Accuracy of the model on the 50000 test images: 83.0%
Max accuracy: 83.12%
Epoch: [259]  [   0/2502]  eta: 1:30:42  lr: 0.000209  min_lr: 0.000209  loss: 3.3073 (3.3073)  weight_decay: 0.0500 (0.0500)  time: 2.1752  data: 1.3467  max mem: 27255
Epoch: [259]  [ 200/2502]  eta: 0:13:10  lr: 0.000208  min_lr: 0.000208  loss: 2.8860 (2.8026)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5441 (1.6325)  time: 0.3352  data: 0.0005  max mem: 27255
Epoch: [259]  [ 400/2502]  eta: 0:11:52  lr: 0.000207  min_lr: 0.000207  loss: 3.0050 (2.7995)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4767 (1.5648)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [259]  [ 600/2502]  eta: 0:10:41  lr: 0.000206  min_lr: 0.000206  loss: 2.7445 (2.8068)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5920 (1.5736)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [259]  [ 800/2502]  eta: 0:09:32  lr: 0.000206  min_lr: 0.000206  loss: 2.8582 (2.8245)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4527 (1.5634)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [259]  [1000/2502]  eta: 0:08:24  lr: 0.000205  min_lr: 0.000205  loss: 2.8081 (2.8211)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5633 (1.5742)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [259]  [1200/2502]  eta: 0:07:17  lr: 0.000204  min_lr: 0.000204  loss: 2.8701 (2.8179)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5746 (1.5789)  time: 0.3332  data: 0.0003  max mem: 27255
Epoch: [259]  [1400/2502]  eta: 0:06:09  lr: 0.000203  min_lr: 0.000203  loss: 2.9492 (2.8285)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4922 (1.5758)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [259]  [1600/2502]  eta: 0:05:02  lr: 0.000203  min_lr: 0.000203  loss: 2.8094 (2.8273)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5991 (1.5800)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [259]  [1800/2502]  eta: 0:03:55  lr: 0.000202  min_lr: 0.000202  loss: 2.9166 (2.8279)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4508 (1.5707)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [259]  [2000/2502]  eta: 0:02:48  lr: 0.000201  min_lr: 0.000201  loss: 2.8175 (2.8259)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6860 (1.5803)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [259]  [2200/2502]  eta: 0:01:41  lr: 0.000200  min_lr: 0.000200  loss: 2.6907 (2.8280)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4601 (1.5778)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [259]  [2400/2502]  eta: 0:00:34  lr: 0.000199  min_lr: 0.000199  loss: 2.8679 (2.8322)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5888 (1.5825)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [259]  [2501/2502]  eta: 0:00:00  lr: 0.000199  min_lr: 0.000199  loss: 2.9333 (2.8320)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5621 (1.5814)  time: 0.3005  data: 0.0009  max mem: 27255
Epoch: [259] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000199  min_lr: 0.000199  loss: 2.9333 (2.8381)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5621 (1.5814)
Test:  [ 0/50]  eta: 0:02:58  loss: 0.4739 (0.4739)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.5799  data: 3.3815  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7418 (0.7419)  acc1: 88.4000 (88.0364)  acc5: 98.8000 (98.0000)  time: 0.4760  data: 0.3090  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7496 (0.7729)  acc1: 85.2000 (87.0286)  acc5: 98.0000 (97.9810)  time: 0.1705  data: 0.0065  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9312 (0.8609)  acc1: 83.2000 (84.9677)  acc5: 96.4000 (96.8645)  time: 0.2020  data: 0.0378  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0077 (0.9075)  acc1: 80.0000 (83.6000)  acc5: 94.4000 (96.4976)  time: 0.2048  data: 0.0407  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0065 (0.9297)  acc1: 79.6000 (82.8800)  acc5: 95.6000 (96.4160)  time: 0.2043  data: 0.0405  max mem: 27255
Test: Total time: 0:00:12 (0.2533 s / it)
* Acc@1 83.096 Acc@5 96.546 loss 0.921
Accuracy of the model on the 50000 test images: 83.1%
Max accuracy: 83.12%
Epoch: [260]  [   0/2502]  eta: 1:32:27  lr: 0.000199  min_lr: 0.000199  loss: 2.5138 (2.5138)  weight_decay: 0.0500 (0.0500)  time: 2.2172  data: 1.3103  max mem: 27255
Epoch: [260]  [ 200/2502]  eta: 0:13:11  lr: 0.000198  min_lr: 0.000198  loss: 2.9935 (2.8055)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5478 (1.5889)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [260]  [ 400/2502]  eta: 0:11:52  lr: 0.000197  min_lr: 0.000197  loss: 3.0437 (2.8327)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4842 (1.5817)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [260]  [ 600/2502]  eta: 0:10:41  lr: 0.000197  min_lr: 0.000197  loss: 2.8422 (2.8135)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5963 (1.5852)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [260]  [ 800/2502]  eta: 0:09:32  lr: 0.000196  min_lr: 0.000196  loss: 2.9139 (2.7972)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4882 (1.5790)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [260]  [1000/2502]  eta: 0:08:24  lr: 0.000195  min_lr: 0.000195  loss: 2.9254 (2.8017)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4863 (1.5818)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [260]  [1200/2502]  eta: 0:07:17  lr: 0.000194  min_lr: 0.000194  loss: 2.7627 (2.8008)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5714 (1.5831)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [260]  [1400/2502]  eta: 0:06:09  lr: 0.000194  min_lr: 0.000194  loss: 2.8476 (2.8074)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5707 (1.5910)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [260]  [1600/2502]  eta: 0:05:02  lr: 0.000193  min_lr: 0.000193  loss: 2.6864 (2.8114)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6081 (1.5911)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [260]  [1800/2502]  eta: 0:03:55  lr: 0.000192  min_lr: 0.000192  loss: 3.0471 (2.8129)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5820 (1.6079)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [260]  [2000/2502]  eta: 0:02:48  lr: 0.000191  min_lr: 0.000191  loss: 2.9677 (2.8199)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6119 (1.6082)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [260]  [2200/2502]  eta: 0:01:41  lr: 0.000191  min_lr: 0.000191  loss: 2.9246 (2.8229)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6785 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [260]  [2400/2502]  eta: 0:00:34  lr: 0.000190  min_lr: 0.000190  loss: 2.9214 (2.8270)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5956 (inf)  time: 0.3325  data: 0.0003  max mem: 27255
Epoch: [260]  [2501/2502]  eta: 0:00:00  lr: 0.000189  min_lr: 0.000189  loss: 3.0545 (2.8286)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6006 (inf)  time: 0.3005  data: 0.0006  max mem: 27255
Epoch: [260] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.000189  min_lr: 0.000189  loss: 3.0545 (2.8427)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6006 (inf)
Test:  [ 0/50]  eta: 0:02:12  loss: 0.5349 (0.5349)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.6470  data: 2.4439  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7874 (0.7803)  acc1: 88.4000 (88.0364)  acc5: 98.8000 (98.2546)  time: 0.4756  data: 0.3080  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8042 (0.8052)  acc1: 85.2000 (87.0667)  acc5: 98.0000 (98.1143)  time: 0.2475  data: 0.0835  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9473 (0.8919)  acc1: 83.6000 (85.1871)  acc5: 96.4000 (97.0452)  time: 0.2144  data: 0.0505  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0794 (0.9399)  acc1: 80.0000 (83.7854)  acc5: 94.8000 (96.6342)  time: 0.1921  data: 0.0277  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0794 (0.9596)  acc1: 79.2000 (83.0480)  acc5: 95.2000 (96.5120)  time: 0.1778  data: 0.0136  max mem: 27255
Test: Total time: 0:00:13 (0.2603 s / it)
* Acc@1 83.224 Acc@5 96.560 loss 0.954
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.22%
Epoch: [261]  [   0/2502]  eta: 1:10:48  lr: 0.000189  min_lr: 0.000189  loss: 3.3270 (3.3270)  weight_decay: 0.0500 (0.0500)  time: 1.6982  data: 1.3564  max mem: 27255
Epoch: [261]  [ 200/2502]  eta: 0:13:06  lr: 0.000189  min_lr: 0.000189  loss: 2.8186 (2.8024)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4426 (1.5278)  time: 0.3363  data: 0.0005  max mem: 27255
Epoch: [261]  [ 400/2502]  eta: 0:11:53  lr: 0.000188  min_lr: 0.000188  loss: 3.0297 (2.8264)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6106 (1.5674)  time: 0.3368  data: 0.0004  max mem: 27255
Epoch: [261]  [ 600/2502]  eta: 0:10:43  lr: 0.000187  min_lr: 0.000187  loss: 3.0397 (2.8263)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5126 (1.5801)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [261]  [ 800/2502]  eta: 0:09:33  lr: 0.000186  min_lr: 0.000186  loss: 2.9077 (2.8305)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5484 (1.5871)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [261]  [1000/2502]  eta: 0:08:25  lr: 0.000186  min_lr: 0.000186  loss: 2.6947 (2.8334)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6005 (1.6265)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [261]  [1200/2502]  eta: 0:07:17  lr: 0.000185  min_lr: 0.000185  loss: 2.9078 (2.8318)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5310 (1.6158)  time: 0.3336  data: 0.0003  max mem: 27255
Epoch: [261]  [1400/2502]  eta: 0:06:10  lr: 0.000184  min_lr: 0.000184  loss: 2.9604 (2.8327)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6139 (1.6179)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [261]  [1600/2502]  eta: 0:05:02  lr: 0.000183  min_lr: 0.000183  loss: 3.0443 (2.8295)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5816 (1.6076)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [261]  [1800/2502]  eta: 0:03:55  lr: 0.000183  min_lr: 0.000183  loss: 2.9941 (2.8343)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5422 (1.6101)  time: 0.3356  data: 0.0005  max mem: 27255
Epoch: [261]  [2000/2502]  eta: 0:02:48  lr: 0.000182  min_lr: 0.000182  loss: 2.9088 (2.8334)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5374 (1.6098)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [261]  [2200/2502]  eta: 0:01:41  lr: 0.000181  min_lr: 0.000181  loss: 3.0168 (2.8344)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5265 (1.6075)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [261]  [2400/2502]  eta: 0:00:34  lr: 0.000180  min_lr: 0.000180  loss: 2.8626 (2.8352)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4716 (1.6046)  time: 0.3342  data: 0.0005  max mem: 27255
Epoch: [261]  [2501/2502]  eta: 0:00:00  lr: 0.000180  min_lr: 0.000180  loss: 3.0446 (2.8378)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4986 (1.6021)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [261] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000180  min_lr: 0.000180  loss: 3.0446 (2.8263)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4986 (1.6021)
Test:  [ 0/50]  eta: 0:02:18  loss: 0.5568 (0.5568)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.7770  data: 2.5739  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8014 (0.8079)  acc1: 89.6000 (88.1091)  acc5: 98.4000 (97.8545)  time: 0.4390  data: 0.2715  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8202 (0.8374)  acc1: 86.4000 (87.1048)  acc5: 98.0000 (97.9238)  time: 0.2170  data: 0.0531  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9998 (0.9329)  acc1: 83.2000 (84.9677)  acc5: 97.2000 (97.0581)  time: 0.2494  data: 0.0851  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1484 (0.9842)  acc1: 79.6000 (83.6195)  acc5: 95.2000 (96.6244)  time: 0.2411  data: 0.0768  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1397 (1.0050)  acc1: 79.6000 (82.9040)  acc5: 95.6000 (96.5520)  time: 0.1880  data: 0.0243  max mem: 27255
Test: Total time: 0:00:13 (0.2699 s / it)
* Acc@1 83.210 Acc@5 96.556 loss 0.997
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.22%
Epoch: [262]  [   0/2502]  eta: 1:19:52  lr: 0.000180  min_lr: 0.000180  loss: 2.6819 (2.6819)  weight_decay: 0.0500 (0.0500)  time: 1.9154  data: 1.4943  max mem: 27255
Epoch: [262]  [ 200/2502]  eta: 0:13:06  lr: 0.000179  min_lr: 0.000179  loss: 2.9456 (2.7894)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5105 (1.6946)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [262]  [ 400/2502]  eta: 0:11:49  lr: 0.000179  min_lr: 0.000179  loss: 2.9558 (2.8029)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6369 (1.6452)  time: 0.3325  data: 0.0003  max mem: 27255
Epoch: [262]  [ 600/2502]  eta: 0:10:39  lr: 0.000178  min_lr: 0.000178  loss: 3.0583 (2.8113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5512 (1.6317)  time: 0.3330  data: 0.0003  max mem: 27255
Epoch: [262]  [ 800/2502]  eta: 0:09:30  lr: 0.000177  min_lr: 0.000177  loss: 2.8859 (2.8233)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4937 (1.6365)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [262]  [1000/2502]  eta: 0:08:23  lr: 0.000176  min_lr: 0.000176  loss: 2.5135 (2.8190)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5343 (1.6331)  time: 0.3416  data: 0.0003  max mem: 27255
Epoch: [262]  [1200/2502]  eta: 0:07:16  lr: 0.000176  min_lr: 0.000176  loss: 2.9967 (2.8143)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5849 (1.6385)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [262]  [1400/2502]  eta: 0:06:09  lr: 0.000175  min_lr: 0.000175  loss: 3.0342 (2.8103)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7148 (1.6375)  time: 0.3425  data: 0.0005  max mem: 27255
Epoch: [262]  [1600/2502]  eta: 0:05:02  lr: 0.000174  min_lr: 0.000174  loss: 3.0119 (2.8108)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6993 (1.6355)  time: 0.3340  data: 0.0003  max mem: 27255
Epoch: [262]  [1800/2502]  eta: 0:03:55  lr: 0.000173  min_lr: 0.000173  loss: 2.7456 (2.8110)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5568 (1.6329)  time: 0.3325  data: 0.0003  max mem: 27255
Epoch: [262]  [2000/2502]  eta: 0:02:48  lr: 0.000173  min_lr: 0.000173  loss: 2.9573 (2.8115)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5745 (1.6347)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [262]  [2200/2502]  eta: 0:01:41  lr: 0.000172  min_lr: 0.000172  loss: 2.9649 (2.8113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5775 (1.6365)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [262]  [2400/2502]  eta: 0:00:34  lr: 0.000171  min_lr: 0.000171  loss: 2.7892 (2.8137)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5515 (1.6298)  time: 0.3325  data: 0.0003  max mem: 27255
Epoch: [262]  [2501/2502]  eta: 0:00:00  lr: 0.000171  min_lr: 0.000171  loss: 2.9153 (2.8134)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4898 (1.6249)  time: 0.2998  data: 0.0007  max mem: 27255
Epoch: [262] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.000171  min_lr: 0.000171  loss: 2.9153 (2.8215)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4898 (1.6249)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.5731 (0.5731)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.8410  data: 2.6403  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8386 (0.8456)  acc1: 89.6000 (88.3636)  acc5: 98.8000 (98.0000)  time: 0.4079  data: 0.2405  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8484 (0.8666)  acc1: 86.0000 (87.3333)  acc5: 98.0000 (98.0381)  time: 0.2308  data: 0.0651  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0229 (0.9606)  acc1: 83.2000 (85.3032)  acc5: 96.4000 (97.0710)  time: 0.2855  data: 0.1188  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1649 (1.0157)  acc1: 79.6000 (83.7561)  acc5: 94.8000 (96.5854)  time: 0.2294  data: 0.0647  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1649 (1.0369)  acc1: 79.6000 (83.1360)  acc5: 95.6000 (96.5600)  time: 0.1744  data: 0.0108  max mem: 27255
Test: Total time: 0:00:13 (0.2731 s / it)
* Acc@1 83.284 Acc@5 96.600 loss 1.031
Accuracy of the model on the 50000 test images: 83.3%
Max accuracy: 83.28%
Epoch: [263]  [   0/2502]  eta: 1:25:05  lr: 0.000171  min_lr: 0.000171  loss: 3.2478 (3.2478)  weight_decay: 0.0500 (0.0500)  time: 2.0407  data: 1.6982  max mem: 27255
Epoch: [263]  [ 200/2502]  eta: 0:13:08  lr: 0.000170  min_lr: 0.000170  loss: 2.7120 (2.7855)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4838 (1.6035)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [263]  [ 400/2502]  eta: 0:11:51  lr: 0.000169  min_lr: 0.000169  loss: 3.0055 (2.8103)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6575 (1.6109)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [263]  [ 600/2502]  eta: 0:10:41  lr: 0.000169  min_lr: 0.000169  loss: 3.0619 (2.8264)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6800 (1.6275)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [263]  [ 800/2502]  eta: 0:09:33  lr: 0.000168  min_lr: 0.000168  loss: 2.7416 (2.8205)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4658 (1.6165)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [263]  [1000/2502]  eta: 0:08:25  lr: 0.000167  min_lr: 0.000167  loss: 2.9549 (2.8146)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4945 (1.6254)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [263]  [1200/2502]  eta: 0:07:17  lr: 0.000167  min_lr: 0.000167  loss: 2.7762 (2.8123)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6439 (1.6327)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [263]  [1400/2502]  eta: 0:06:10  lr: 0.000166  min_lr: 0.000166  loss: 2.8506 (2.8095)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4650 (1.6309)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [263]  [1600/2502]  eta: 0:05:02  lr: 0.000165  min_lr: 0.000165  loss: 2.7829 (2.8071)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5394 (1.6236)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [263]  [1800/2502]  eta: 0:03:55  lr: 0.000164  min_lr: 0.000164  loss: 2.9169 (2.8102)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5377 (1.6187)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [263]  [2000/2502]  eta: 0:02:48  lr: 0.000164  min_lr: 0.000164  loss: 2.9200 (2.8119)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5389 (1.6179)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [263]  [2200/2502]  eta: 0:01:41  lr: 0.000163  min_lr: 0.000163  loss: 2.7351 (2.8103)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5410 (1.6181)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [263]  [2400/2502]  eta: 0:00:34  lr: 0.000162  min_lr: 0.000162  loss: 2.7133 (2.8055)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6028 (1.6182)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [263]  [2501/2502]  eta: 0:00:00  lr: 0.000162  min_lr: 0.000162  loss: 2.9307 (2.8085)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6132 (1.6221)  time: 0.3043  data: 0.0008  max mem: 27255
Epoch: [263] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000162  min_lr: 0.000162  loss: 2.9307 (2.8105)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6132 (1.6221)
Test:  [ 0/50]  eta: 0:02:37  loss: 0.5486 (0.5486)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1404  data: 2.9417  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7865 (0.8027)  acc1: 89.6000 (87.8545)  acc5: 99.2000 (98.1091)  time: 0.4798  data: 0.3108  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.8220 (0.8318)  acc1: 84.4000 (86.8000)  acc5: 98.4000 (98.0762)  time: 0.2483  data: 0.0826  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0096 (0.9258)  acc1: 82.8000 (84.8129)  acc5: 97.2000 (96.9677)  time: 0.2637  data: 0.0990  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.1312 (0.9778)  acc1: 80.0000 (83.4634)  acc5: 94.4000 (96.5659)  time: 0.2114  data: 0.0475  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1312 (0.9986)  acc1: 78.8000 (82.7920)  acc5: 95.6000 (96.5040)  time: 0.1710  data: 0.0074  max mem: 27255
Test: Total time: 0:00:13 (0.2781 s / it)
* Acc@1 83.134 Acc@5 96.572 loss 0.992
Accuracy of the model on the 50000 test images: 83.1%
Max accuracy: 83.28%
Epoch: [264]  [   0/2502]  eta: 1:40:25  lr: 0.000162  min_lr: 0.000162  loss: 3.4174 (3.4174)  weight_decay: 0.0500 (0.0500)  time: 2.4082  data: 1.3060  max mem: 27255
Epoch: [264]  [ 200/2502]  eta: 0:13:15  lr: 0.000161  min_lr: 0.000161  loss: 2.8880 (2.8316)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5324 (1.6223)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [264]  [ 400/2502]  eta: 0:11:54  lr: 0.000160  min_lr: 0.000160  loss: 3.0157 (2.8248)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8157 (1.6497)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [264]  [ 600/2502]  eta: 0:10:42  lr: 0.000160  min_lr: 0.000160  loss: 2.4665 (2.7925)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5061 (1.6423)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [264]  [ 800/2502]  eta: 0:09:33  lr: 0.000159  min_lr: 0.000159  loss: 2.8478 (2.8138)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5314 (1.6267)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [264]  [1000/2502]  eta: 0:08:25  lr: 0.000158  min_lr: 0.000158  loss: 3.0460 (2.8155)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6056 (1.6320)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [264]  [1200/2502]  eta: 0:07:17  lr: 0.000158  min_lr: 0.000158  loss: 2.8328 (2.8071)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5887 (1.6348)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [264]  [1400/2502]  eta: 0:06:10  lr: 0.000157  min_lr: 0.000157  loss: 2.7967 (2.8052)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5612 (1.6313)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [264]  [1600/2502]  eta: 0:05:02  lr: 0.000156  min_lr: 0.000156  loss: 2.9570 (2.8039)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4631 (1.6261)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [264]  [1800/2502]  eta: 0:03:55  lr: 0.000156  min_lr: 0.000156  loss: 2.8256 (2.7967)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6817 (inf)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [264]  [2000/2502]  eta: 0:02:48  lr: 0.000155  min_lr: 0.000155  loss: 3.0055 (2.7989)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6062 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [264]  [2200/2502]  eta: 0:01:41  lr: 0.000154  min_lr: 0.000154  loss: 3.1387 (2.7966)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5981 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [264]  [2400/2502]  eta: 0:00:34  lr: 0.000154  min_lr: 0.000154  loss: 2.9582 (2.8014)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7094 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [264]  [2501/2502]  eta: 0:00:00  lr: 0.000153  min_lr: 0.000153  loss: 3.1617 (2.8013)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5382 (inf)  time: 0.2998  data: 0.0009  max mem: 27255
Epoch: [264] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000153  min_lr: 0.000153  loss: 3.1617 (2.8076)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5382 (inf)
Test:  [ 0/50]  eta: 0:02:26  loss: 0.5366 (0.5366)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.9389  data: 2.7337  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7953 (0.8028)  acc1: 90.0000 (88.3636)  acc5: 98.8000 (98.1091)  time: 0.4775  data: 0.3100  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.8319 (0.8294)  acc1: 84.8000 (87.2191)  acc5: 98.4000 (98.0762)  time: 0.2510  data: 0.0872  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9708 (0.9224)  acc1: 83.2000 (85.1613)  acc5: 96.4000 (97.0710)  time: 0.2650  data: 0.1013  max mem: 27255
Test:  [40/50]  eta: 0:00:03  loss: 1.1519 (0.9747)  acc1: 79.6000 (83.7463)  acc5: 94.8000 (96.5463)  time: 0.2515  data: 0.0879  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1519 (0.9951)  acc1: 79.2000 (83.1280)  acc5: 95.6000 (96.5280)  time: 0.2036  data: 0.0402  max mem: 27255
Test: Total time: 0:00:14 (0.2915 s / it)
* Acc@1 83.242 Acc@5 96.554 loss 0.989
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.28%
Epoch: [265]  [   0/2502]  eta: 1:28:58  lr: 0.000153  min_lr: 0.000153  loss: 3.2271 (3.2271)  weight_decay: 0.0500 (0.0500)  time: 2.1337  data: 1.4463  max mem: 27255
Epoch: [265]  [ 200/2502]  eta: 0:13:11  lr: 0.000153  min_lr: 0.000153  loss: 2.8200 (2.7133)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5624 (1.5795)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [265]  [ 400/2502]  eta: 0:11:52  lr: 0.000152  min_lr: 0.000152  loss: 2.8814 (2.7327)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7294 (1.6701)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [265]  [ 600/2502]  eta: 0:10:41  lr: 0.000151  min_lr: 0.000151  loss: 3.0846 (2.7549)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6385 (1.6729)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [265]  [ 800/2502]  eta: 0:09:32  lr: 0.000150  min_lr: 0.000150  loss: 3.0284 (2.7669)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5083 (1.6489)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [265]  [1000/2502]  eta: 0:08:24  lr: 0.000150  min_lr: 0.000150  loss: 3.0340 (2.7726)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5742 (1.6509)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [265]  [1200/2502]  eta: 0:07:17  lr: 0.000149  min_lr: 0.000149  loss: 2.9037 (2.7906)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5581 (1.6452)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [265]  [1400/2502]  eta: 0:06:10  lr: 0.000148  min_lr: 0.000148  loss: 2.9707 (2.7920)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5654 (1.6464)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [265]  [1600/2502]  eta: 0:05:02  lr: 0.000148  min_lr: 0.000148  loss: 2.8165 (2.7892)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5406 (1.6508)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [265]  [1800/2502]  eta: 0:03:55  lr: 0.000147  min_lr: 0.000147  loss: 2.8637 (2.7889)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5634 (1.6456)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [265]  [2000/2502]  eta: 0:02:48  lr: 0.000146  min_lr: 0.000146  loss: 2.8265 (2.7910)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7855 (1.6582)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [265]  [2200/2502]  eta: 0:01:41  lr: 0.000146  min_lr: 0.000146  loss: 2.8060 (2.7941)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5032 (1.6573)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [265]  [2400/2502]  eta: 0:00:34  lr: 0.000145  min_lr: 0.000145  loss: 2.8914 (2.7953)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5284 (1.6598)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [265]  [2501/2502]  eta: 0:00:00  lr: 0.000145  min_lr: 0.000145  loss: 2.9140 (2.7943)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5740 (1.6600)  time: 0.3031  data: 0.0011  max mem: 27255
Epoch: [265] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000145  min_lr: 0.000145  loss: 2.9140 (2.8064)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5740 (1.6600)
Test:  [ 0/50]  eta: 0:03:05  loss: 0.4878 (0.4878)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.7193  data: 3.5263  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.7028 (0.7468)  acc1: 90.0000 (88.6546)  acc5: 98.8000 (98.0364)  time: 0.5005  data: 0.3324  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7615 (0.7783)  acc1: 84.4000 (87.1238)  acc5: 98.4000 (98.0762)  time: 0.1851  data: 0.0151  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9231 (0.8685)  acc1: 83.2000 (85.2903)  acc5: 96.8000 (97.1355)  time: 0.1807  data: 0.0109  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0688 (0.9183)  acc1: 79.6000 (83.8049)  acc5: 94.4000 (96.6634)  time: 0.2181  data: 0.0508  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0688 (0.9380)  acc1: 78.4000 (83.1440)  acc5: 95.6000 (96.5920)  time: 0.2274  data: 0.0605  max mem: 27255
Test: Total time: 0:00:13 (0.2717 s / it)
* Acc@1 83.368 Acc@5 96.620 loss 0.933
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.37%
Epoch: [266]  [   0/2502]  eta: 1:30:44  lr: 0.000145  min_lr: 0.000145  loss: 3.1051 (3.1051)  weight_decay: 0.0500 (0.0500)  time: 2.1760  data: 1.8197  max mem: 27255
Epoch: [266]  [ 200/2502]  eta: 0:13:09  lr: 0.000144  min_lr: 0.000144  loss: 2.9596 (2.8607)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6569 (1.7437)  time: 0.3350  data: 0.0005  max mem: 27255
Epoch: [266]  [ 400/2502]  eta: 0:11:53  lr: 0.000143  min_lr: 0.000143  loss: 2.9962 (2.8321)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4979 (1.6735)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [266]  [ 600/2502]  eta: 0:10:41  lr: 0.000143  min_lr: 0.000143  loss: 2.9401 (2.8285)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5524 (1.6600)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [266]  [ 800/2502]  eta: 0:09:32  lr: 0.000142  min_lr: 0.000142  loss: 2.8347 (2.8226)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6246 (1.6662)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [266]  [1000/2502]  eta: 0:08:24  lr: 0.000141  min_lr: 0.000141  loss: 2.9574 (2.8156)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5773 (1.6687)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [266]  [1200/2502]  eta: 0:07:16  lr: 0.000141  min_lr: 0.000141  loss: 2.8152 (2.8181)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6975 (1.6706)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [266]  [1400/2502]  eta: 0:06:09  lr: 0.000140  min_lr: 0.000140  loss: 2.8160 (2.8099)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5975 (1.6643)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [266]  [1600/2502]  eta: 0:05:02  lr: 0.000139  min_lr: 0.000139  loss: 2.9522 (2.8078)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5703 (1.6652)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [266]  [1800/2502]  eta: 0:03:55  lr: 0.000139  min_lr: 0.000139  loss: 2.9980 (2.8084)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7095 (1.6684)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [266]  [2000/2502]  eta: 0:02:48  lr: 0.000138  min_lr: 0.000138  loss: 2.8013 (2.8029)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5204 (1.6702)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [266]  [2200/2502]  eta: 0:01:41  lr: 0.000137  min_lr: 0.000137  loss: 2.9398 (2.8069)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7086 (1.6735)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [266]  [2400/2502]  eta: 0:00:34  lr: 0.000137  min_lr: 0.000137  loss: 3.0114 (2.8068)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5708 (1.6713)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [266]  [2501/2502]  eta: 0:00:00  lr: 0.000137  min_lr: 0.000137  loss: 2.8940 (2.8086)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5389 (1.6683)  time: 0.3008  data: 0.0008  max mem: 27255
Epoch: [266] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000137  min_lr: 0.000137  loss: 2.8940 (2.8033)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5389 (1.6683)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.5056 (0.5056)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.7177  data: 2.5274  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7546 (0.7602)  acc1: 89.6000 (88.2182)  acc5: 98.8000 (98.1091)  time: 0.4278  data: 0.2613  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7813 (0.7907)  acc1: 86.0000 (87.1619)  acc5: 98.0000 (98.0571)  time: 0.2088  data: 0.0448  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9258 (0.8796)  acc1: 83.6000 (85.2645)  acc5: 96.8000 (97.1097)  time: 0.2495  data: 0.0855  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0651 (0.9273)  acc1: 79.6000 (83.8439)  acc5: 95.2000 (96.5756)  time: 0.2576  data: 0.0938  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0645 (0.9479)  acc1: 78.8000 (83.1920)  acc5: 95.6000 (96.5440)  time: 0.1993  data: 0.0357  max mem: 27255
Test: Total time: 0:00:13 (0.2725 s / it)
* Acc@1 83.364 Acc@5 96.604 loss 0.941
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.37%
Epoch: [267]  [   0/2502]  eta: 1:21:40  lr: 0.000136  min_lr: 0.000136  loss: 1.7935 (1.7935)  weight_decay: 0.0500 (0.0500)  time: 1.9587  data: 1.4679  max mem: 27255
Epoch: [267]  [ 200/2502]  eta: 0:13:08  lr: 0.000136  min_lr: 0.000136  loss: 2.9994 (2.7735)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6544 (1.7001)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [267]  [ 400/2502]  eta: 0:11:51  lr: 0.000135  min_lr: 0.000135  loss: 2.9745 (2.7805)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6234 (1.6688)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [267]  [ 600/2502]  eta: 0:10:40  lr: 0.000135  min_lr: 0.000135  loss: 2.7442 (2.7669)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4860 (1.6460)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [267]  [ 800/2502]  eta: 0:09:32  lr: 0.000134  min_lr: 0.000134  loss: 2.8754 (2.7709)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6307 (1.6386)  time: 0.3371  data: 0.0005  max mem: 27255
Epoch: [267]  [1000/2502]  eta: 0:08:24  lr: 0.000133  min_lr: 0.000133  loss: 2.8382 (2.7700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5323 (1.6484)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [267]  [1200/2502]  eta: 0:07:17  lr: 0.000133  min_lr: 0.000133  loss: 2.8555 (2.7698)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6332 (1.6458)  time: 0.3330  data: 0.0005  max mem: 27255
Epoch: [267]  [1400/2502]  eta: 0:06:09  lr: 0.000132  min_lr: 0.000132  loss: 2.9597 (2.7788)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5721 (1.6419)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [267]  [1600/2502]  eta: 0:05:02  lr: 0.000131  min_lr: 0.000131  loss: 3.0150 (2.7726)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6992 (1.6430)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [267]  [1800/2502]  eta: 0:03:55  lr: 0.000131  min_lr: 0.000131  loss: 2.9796 (2.7794)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5427 (1.6384)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [267]  [2000/2502]  eta: 0:02:48  lr: 0.000130  min_lr: 0.000130  loss: 2.7838 (2.7773)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6120 (1.6432)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [267]  [2200/2502]  eta: 0:01:41  lr: 0.000129  min_lr: 0.000129  loss: 2.7898 (2.7791)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5785 (1.6438)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [267]  [2400/2502]  eta: 0:00:34  lr: 0.000129  min_lr: 0.000129  loss: 2.6018 (2.7850)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4708 (1.6401)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [267]  [2501/2502]  eta: 0:00:00  lr: 0.000129  min_lr: 0.000129  loss: 2.9546 (2.7840)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5343 (1.6389)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [267] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000129  min_lr: 0.000129  loss: 2.9546 (2.7948)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5343 (1.6389)
Test:  [ 0/50]  eta: 0:02:11  loss: 0.5449 (0.5449)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.6385  data: 2.4488  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.7969 (0.7927)  acc1: 88.8000 (88.2909)  acc5: 98.4000 (98.0000)  time: 0.4236  data: 0.2572  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8203 (0.8194)  acc1: 85.2000 (86.9333)  acc5: 98.4000 (97.9810)  time: 0.2240  data: 0.0599  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9531 (0.9038)  acc1: 82.4000 (84.9936)  acc5: 96.4000 (96.9290)  time: 0.2136  data: 0.0495  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0905 (0.9491)  acc1: 79.6000 (83.7073)  acc5: 94.0000 (96.4781)  time: 0.2000  data: 0.0361  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0905 (0.9674)  acc1: 78.8000 (83.0320)  acc5: 95.6000 (96.4240)  time: 0.2025  data: 0.0387  max mem: 27255
Test: Total time: 0:00:12 (0.2584 s / it)
* Acc@1 83.242 Acc@5 96.598 loss 0.959
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.37%
Epoch: [268]  [   0/2502]  eta: 1:31:29  lr: 0.000128  min_lr: 0.000128  loss: 2.9720 (2.9720)  weight_decay: 0.0500 (0.0500)  time: 2.1940  data: 1.8552  max mem: 27255
Epoch: [268]  [ 200/2502]  eta: 0:13:11  lr: 0.000128  min_lr: 0.000128  loss: 3.0683 (2.8005)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7376 (1.6874)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [268]  [ 400/2502]  eta: 0:11:52  lr: 0.000127  min_lr: 0.000127  loss: 2.7175 (2.7798)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5186 (inf)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [268]  [ 600/2502]  eta: 0:10:42  lr: 0.000127  min_lr: 0.000127  loss: 2.9776 (2.7879)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5027 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [268]  [ 800/2502]  eta: 0:09:33  lr: 0.000126  min_lr: 0.000126  loss: 2.8775 (2.7940)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7358 (inf)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [268]  [1000/2502]  eta: 0:08:25  lr: 0.000125  min_lr: 0.000125  loss: 2.7144 (2.7866)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5433 (inf)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [268]  [1200/2502]  eta: 0:07:17  lr: 0.000125  min_lr: 0.000125  loss: 2.8802 (2.7922)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6606 (inf)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [268]  [1400/2502]  eta: 0:06:10  lr: 0.000124  min_lr: 0.000124  loss: 2.9833 (2.7897)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7147 (inf)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [268]  [1600/2502]  eta: 0:05:03  lr: 0.000124  min_lr: 0.000124  loss: 2.7773 (2.7901)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6176 (inf)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [268]  [1800/2502]  eta: 0:03:55  lr: 0.000123  min_lr: 0.000123  loss: 3.0679 (2.7958)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5772 (inf)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [268]  [2000/2502]  eta: 0:02:48  lr: 0.000122  min_lr: 0.000122  loss: 2.9088 (2.7980)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5849 (inf)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [268]  [2200/2502]  eta: 0:01:41  lr: 0.000122  min_lr: 0.000122  loss: 2.8781 (2.7978)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6452 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [268]  [2400/2502]  eta: 0:00:34  lr: 0.000121  min_lr: 0.000121  loss: 2.6136 (2.7965)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6373 (inf)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [268]  [2501/2502]  eta: 0:00:00  lr: 0.000121  min_lr: 0.000121  loss: 2.8988 (2.7935)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6509 (inf)  time: 0.3006  data: 0.0009  max mem: 27255
Epoch: [268] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000121  min_lr: 0.000121  loss: 2.8988 (2.7901)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6509 (inf)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.6135 (0.6135)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.8406  data: 2.6459  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8933 (0.8853)  acc1: 88.4000 (87.8909)  acc5: 98.8000 (97.8545)  time: 0.4297  data: 0.2629  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8933 (0.9032)  acc1: 84.8000 (86.7619)  acc5: 98.4000 (97.9429)  time: 0.2162  data: 0.0482  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0607 (0.9988)  acc1: 82.4000 (84.9677)  acc5: 96.0000 (96.8258)  time: 0.2515  data: 0.0811  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1931 (1.0476)  acc1: 79.6000 (83.6781)  acc5: 94.8000 (96.4781)  time: 0.2323  data: 0.0660  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1689 (1.0692)  acc1: 79.2000 (82.9600)  acc5: 95.6000 (96.4000)  time: 0.1846  data: 0.0209  max mem: 27255
Test: Total time: 0:00:13 (0.2675 s / it)
* Acc@1 83.222 Acc@5 96.558 loss 1.060
Accuracy of the model on the 50000 test images: 83.2%
Max accuracy: 83.37%
Epoch: [269]  [   0/2502]  eta: 1:34:55  lr: 0.000121  min_lr: 0.000121  loss: 2.9666 (2.9666)  weight_decay: 0.0500 (0.0500)  time: 2.2765  data: 1.9293  max mem: 27255
Epoch: [269]  [ 200/2502]  eta: 0:13:11  lr: 0.000120  min_lr: 0.000120  loss: 3.0216 (2.8072)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4882 (1.6367)  time: 0.3341  data: 0.0003  max mem: 27255
Epoch: [269]  [ 400/2502]  eta: 0:11:53  lr: 0.000120  min_lr: 0.000120  loss: 2.7626 (2.7964)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6613 (1.6414)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [269]  [ 600/2502]  eta: 0:10:42  lr: 0.000119  min_lr: 0.000119  loss: 2.9241 (2.8011)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6111 (1.6705)  time: 0.3350  data: 0.0003  max mem: 27255
Epoch: [269]  [ 800/2502]  eta: 0:09:33  lr: 0.000118  min_lr: 0.000118  loss: 2.9980 (2.8049)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5802 (1.6628)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [269]  [1000/2502]  eta: 0:08:25  lr: 0.000118  min_lr: 0.000118  loss: 2.8762 (2.8006)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6865 (1.6741)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [269]  [1200/2502]  eta: 0:07:17  lr: 0.000117  min_lr: 0.000117  loss: 2.8758 (2.7951)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5547 (1.6824)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [269]  [1400/2502]  eta: 0:06:10  lr: 0.000116  min_lr: 0.000116  loss: 2.6927 (2.7980)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5268 (1.6827)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [269]  [1600/2502]  eta: 0:05:02  lr: 0.000116  min_lr: 0.000116  loss: 2.8559 (2.7983)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6561 (1.6896)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [269]  [1800/2502]  eta: 0:03:55  lr: 0.000115  min_lr: 0.000115  loss: 2.8469 (2.7962)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5382 (1.6872)  time: 0.3368  data: 0.0003  max mem: 27255
Epoch: [269]  [2000/2502]  eta: 0:02:48  lr: 0.000115  min_lr: 0.000115  loss: 2.9979 (2.7978)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5933 (1.6844)  time: 0.3353  data: 0.0003  max mem: 27255
Epoch: [269]  [2200/2502]  eta: 0:01:41  lr: 0.000114  min_lr: 0.000114  loss: 2.7614 (2.7935)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6240 (1.6839)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [269]  [2400/2502]  eta: 0:00:34  lr: 0.000114  min_lr: 0.000114  loss: 2.8558 (2.7938)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5167 (1.6790)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [269]  [2501/2502]  eta: 0:00:00  lr: 0.000113  min_lr: 0.000113  loss: 2.9822 (2.7942)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6675 (1.6808)  time: 0.3018  data: 0.0007  max mem: 27255
Epoch: [269] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000113  min_lr: 0.000113  loss: 2.9822 (2.7834)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6675 (1.6808)
Test:  [ 0/50]  eta: 0:02:32  loss: 0.6166 (0.6166)  acc1: 94.4000 (94.4000)  acc5: 98.8000 (98.8000)  time: 3.0515  data: 2.8496  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.8744 (0.8753)  acc1: 90.0000 (88.7273)  acc5: 98.0000 (97.8909)  time: 0.4501  data: 0.2823  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8893 (0.9071)  acc1: 85.2000 (87.2762)  acc5: 98.0000 (97.9810)  time: 0.1927  data: 0.0286  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0516 (0.9997)  acc1: 82.4000 (85.2258)  acc5: 97.2000 (96.9548)  time: 0.1968  data: 0.0306  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1799 (1.0490)  acc1: 79.6000 (83.7561)  acc5: 94.4000 (96.5854)  time: 0.1926  data: 0.0265  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1799 (1.0699)  acc1: 79.2000 (83.0400)  acc5: 95.6000 (96.5360)  time: 0.1877  data: 0.0239  max mem: 27255
Test: Total time: 0:00:12 (0.2464 s / it)
* Acc@1 83.306 Acc@5 96.590 loss 1.064
Accuracy of the model on the 50000 test images: 83.3%
Max accuracy: 83.37%
Epoch: [270]  [   0/2502]  eta: 1:39:31  lr: 0.000113  min_lr: 0.000113  loss: 2.9086 (2.9086)  weight_decay: 0.0500 (0.0500)  time: 2.3865  data: 2.0276  max mem: 27255
Epoch: [270]  [ 200/2502]  eta: 0:13:12  lr: 0.000113  min_lr: 0.000113  loss: 2.8413 (2.8350)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6250 (1.7539)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [270]  [ 400/2502]  eta: 0:11:52  lr: 0.000112  min_lr: 0.000112  loss: 2.6074 (2.7836)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6225 (1.7451)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [270]  [ 600/2502]  eta: 0:10:41  lr: 0.000111  min_lr: 0.000111  loss: 2.8798 (2.7717)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6140 (1.7392)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [270]  [ 800/2502]  eta: 0:09:33  lr: 0.000111  min_lr: 0.000111  loss: 2.9545 (2.7597)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7143 (1.7467)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [270]  [1000/2502]  eta: 0:08:25  lr: 0.000110  min_lr: 0.000110  loss: 2.9528 (2.7702)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7122 (1.7480)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [270]  [1200/2502]  eta: 0:07:17  lr: 0.000110  min_lr: 0.000110  loss: 2.9697 (2.7760)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5648 (1.7294)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [270]  [1400/2502]  eta: 0:06:10  lr: 0.000109  min_lr: 0.000109  loss: 2.9668 (2.7769)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6464 (1.7224)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [270]  [1600/2502]  eta: 0:05:02  lr: 0.000109  min_lr: 0.000109  loss: 2.6098 (2.7791)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6720 (1.7209)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [270]  [1800/2502]  eta: 0:03:55  lr: 0.000108  min_lr: 0.000108  loss: 2.9214 (2.7786)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7869 (1.7213)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [270]  [2000/2502]  eta: 0:02:48  lr: 0.000107  min_lr: 0.000107  loss: 2.7146 (2.7816)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6039 (1.7154)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [270]  [2200/2502]  eta: 0:01:41  lr: 0.000107  min_lr: 0.000107  loss: 2.9408 (2.7808)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7220 (1.7152)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [270]  [2400/2502]  eta: 0:00:34  lr: 0.000106  min_lr: 0.000106  loss: 2.9025 (2.7806)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6105 (1.7074)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [270]  [2501/2502]  eta: 0:00:00  lr: 0.000106  min_lr: 0.000106  loss: 2.9110 (2.7811)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6879 (1.7079)  time: 0.3004  data: 0.0009  max mem: 27255
Epoch: [270] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.000106  min_lr: 0.000106  loss: 2.9110 (2.7853)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6879 (1.7079)
Test:  [ 0/50]  eta: 0:02:23  loss: 0.4543 (0.4543)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.8733  data: 2.6699  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7350 (0.7348)  acc1: 89.6000 (88.8000)  acc5: 98.8000 (98.0000)  time: 0.4548  data: 0.2871  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7450 (0.7664)  acc1: 86.0000 (87.5048)  acc5: 98.4000 (98.0571)  time: 0.1941  data: 0.0300  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9076 (0.8565)  acc1: 84.0000 (85.4194)  acc5: 97.2000 (97.0710)  time: 0.1987  data: 0.0347  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0345 (0.9057)  acc1: 80.8000 (84.0000)  acc5: 94.8000 (96.6244)  time: 0.2047  data: 0.0408  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0345 (0.9248)  acc1: 78.4000 (83.2880)  acc5: 95.6000 (96.5200)  time: 0.1756  data: 0.0118  max mem: 27255
Test: Total time: 0:00:12 (0.2487 s / it)
* Acc@1 83.546 Acc@5 96.644 loss 0.916
Accuracy of the model on the 50000 test images: 83.5%
Max accuracy: 83.55%
Epoch: [271]  [   0/2502]  eta: 1:35:57  lr: 0.000106  min_lr: 0.000106  loss: 3.1169 (3.1169)  weight_decay: 0.0500 (0.0500)  time: 2.3013  data: 1.9581  max mem: 27255
Epoch: [271]  [ 200/2502]  eta: 0:13:12  lr: 0.000105  min_lr: 0.000105  loss: 2.8277 (2.7945)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6929 (1.7648)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [271]  [ 400/2502]  eta: 0:11:55  lr: 0.000105  min_lr: 0.000105  loss: 2.8399 (2.7587)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5991 (1.7073)  time: 0.3521  data: 0.0013  max mem: 27255
Epoch: [271]  [ 600/2502]  eta: 0:10:45  lr: 0.000104  min_lr: 0.000104  loss: 2.8939 (2.7668)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7264 (1.7271)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [271]  [ 800/2502]  eta: 0:09:35  lr: 0.000104  min_lr: 0.000104  loss: 2.8988 (2.7684)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5967 (1.7224)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [271]  [1000/2502]  eta: 0:08:26  lr: 0.000103  min_lr: 0.000103  loss: 2.8193 (2.7755)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6158 (1.7081)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [271]  [1200/2502]  eta: 0:07:18  lr: 0.000102  min_lr: 0.000102  loss: 2.8110 (2.7779)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6771 (1.7098)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [271]  [1400/2502]  eta: 0:06:10  lr: 0.000102  min_lr: 0.000102  loss: 2.6641 (2.7694)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5971 (1.7037)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [271]  [1600/2502]  eta: 0:05:03  lr: 0.000101  min_lr: 0.000101  loss: 2.9968 (2.7729)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6488 (1.7024)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [271]  [1800/2502]  eta: 0:03:55  lr: 0.000101  min_lr: 0.000101  loss: 2.7374 (2.7729)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6351 (1.6982)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [271]  [2000/2502]  eta: 0:02:48  lr: 0.000100  min_lr: 0.000100  loss: 2.9531 (2.7765)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5603 (1.6941)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [271]  [2200/2502]  eta: 0:01:41  lr: 0.000100  min_lr: 0.000100  loss: 2.9738 (2.7813)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6782 (1.6951)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [271]  [2400/2502]  eta: 0:00:34  lr: 0.000099  min_lr: 0.000099  loss: 2.8885 (2.7830)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6124 (1.6956)  time: 0.3390  data: 0.0004  max mem: 27255
Epoch: [271]  [2501/2502]  eta: 0:00:00  lr: 0.000099  min_lr: 0.000099  loss: 2.9454 (2.7835)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6329 (1.6985)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [271] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.000099  min_lr: 0.000099  loss: 2.9454 (2.7777)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6329 (1.6985)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.5648 (0.5648)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.7431  data: 2.5506  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8198 (0.8124)  acc1: 90.0000 (88.7273)  acc5: 98.4000 (97.9636)  time: 0.4451  data: 0.2786  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8357 (0.8383)  acc1: 85.2000 (87.2571)  acc5: 98.0000 (98.0191)  time: 0.2005  data: 0.0365  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9525 (0.9252)  acc1: 82.8000 (85.2516)  acc5: 97.2000 (97.0323)  time: 0.2338  data: 0.0698  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1249 (0.9697)  acc1: 79.6000 (83.9610)  acc5: 94.8000 (96.6342)  time: 0.2438  data: 0.0800  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1028 (0.9899)  acc1: 79.2000 (83.2400)  acc5: 95.6000 (96.5760)  time: 0.1847  data: 0.0211  max mem: 27255
Test: Total time: 0:00:13 (0.2640 s / it)
* Acc@1 83.512 Acc@5 96.626 loss 0.984
Accuracy of the model on the 50000 test images: 83.5%
Max accuracy: 83.55%
Epoch: [272]  [   0/2502]  eta: 1:29:17  lr: 0.000099  min_lr: 0.000099  loss: 3.0152 (3.0152)  weight_decay: 0.0500 (0.0500)  time: 2.1412  data: 1.3599  max mem: 27255
Epoch: [272]  [ 200/2502]  eta: 0:13:08  lr: 0.000098  min_lr: 0.000098  loss: 2.9436 (2.7879)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5237 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [272]  [ 400/2502]  eta: 0:11:51  lr: 0.000098  min_lr: 0.000098  loss: 3.0052 (2.8024)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7370 (inf)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [272]  [ 600/2502]  eta: 0:10:40  lr: 0.000097  min_lr: 0.000097  loss: 2.7743 (2.8081)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6567 (inf)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [272]  [ 800/2502]  eta: 0:09:32  lr: 0.000097  min_lr: 0.000097  loss: 2.9571 (2.8052)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5659 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [272]  [1000/2502]  eta: 0:08:24  lr: 0.000096  min_lr: 0.000096  loss: 2.8718 (2.8001)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5222 (inf)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [272]  [1200/2502]  eta: 0:07:16  lr: 0.000096  min_lr: 0.000096  loss: 2.5581 (2.7969)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6710 (inf)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [272]  [1400/2502]  eta: 0:06:09  lr: 0.000095  min_lr: 0.000095  loss: 2.8527 (2.7943)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6609 (inf)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [272]  [1600/2502]  eta: 0:05:02  lr: 0.000094  min_lr: 0.000094  loss: 2.9031 (2.7891)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6601 (inf)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [272]  [1800/2502]  eta: 0:03:55  lr: 0.000094  min_lr: 0.000094  loss: 2.8511 (2.7830)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5666 (inf)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [272]  [2000/2502]  eta: 0:02:48  lr: 0.000093  min_lr: 0.000093  loss: 2.7353 (2.7850)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7139 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [272]  [2200/2502]  eta: 0:01:41  lr: 0.000093  min_lr: 0.000093  loss: 2.9353 (2.7812)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7062 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [272]  [2400/2502]  eta: 0:00:34  lr: 0.000092  min_lr: 0.000092  loss: 2.9963 (2.7851)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7541 (inf)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [272]  [2501/2502]  eta: 0:00:00  lr: 0.000092  min_lr: 0.000092  loss: 2.9448 (2.7887)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7989 (inf)  time: 0.3010  data: 0.0008  max mem: 27255
Epoch: [272] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000092  min_lr: 0.000092  loss: 2.9448 (2.7817)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7989 (inf)
Test:  [ 0/50]  eta: 0:02:54  loss: 0.5267 (0.5267)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 3.4931  data: 3.2999  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7949 (0.7894)  acc1: 89.2000 (88.5818)  acc5: 98.0000 (97.9273)  time: 0.4669  data: 0.3004  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7973 (0.8137)  acc1: 85.2000 (87.2571)  acc5: 98.0000 (97.9810)  time: 0.1644  data: 0.0005  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9388 (0.9039)  acc1: 82.4000 (85.3936)  acc5: 97.2000 (97.0710)  time: 0.1860  data: 0.0220  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0980 (0.9527)  acc1: 80.8000 (84.0098)  acc5: 94.8000 (96.6244)  time: 0.2254  data: 0.0615  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0980 (0.9728)  acc1: 79.2000 (83.2960)  acc5: 95.6000 (96.5600)  time: 0.2046  data: 0.0408  max mem: 27255
Test: Total time: 0:00:12 (0.2573 s / it)
* Acc@1 83.446 Acc@5 96.636 loss 0.967
Accuracy of the model on the 50000 test images: 83.4%
Max accuracy: 83.55%
Epoch: [273]  [   0/2502]  eta: 1:28:19  lr: 0.000092  min_lr: 0.000092  loss: 2.4321 (2.4321)  weight_decay: 0.0500 (0.0500)  time: 2.1182  data: 1.7228  max mem: 27255
Epoch: [273]  [ 200/2502]  eta: 0:13:08  lr: 0.000092  min_lr: 0.000092  loss: 2.7693 (2.7422)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7892 (1.7696)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [273]  [ 400/2502]  eta: 0:11:51  lr: 0.000091  min_lr: 0.000091  loss: 2.7626 (2.7547)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5459 (1.7178)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [273]  [ 600/2502]  eta: 0:10:41  lr: 0.000090  min_lr: 0.000090  loss: 3.0122 (2.7595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6085 (1.7253)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [273]  [ 800/2502]  eta: 0:09:32  lr: 0.000090  min_lr: 0.000090  loss: 2.8292 (2.7560)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5865 (1.7281)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [273]  [1000/2502]  eta: 0:08:25  lr: 0.000089  min_lr: 0.000089  loss: 2.8134 (2.7597)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5241 (1.7024)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [273]  [1200/2502]  eta: 0:07:17  lr: 0.000089  min_lr: 0.000089  loss: 2.9278 (2.7653)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6400 (1.7108)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [273]  [1400/2502]  eta: 0:06:10  lr: 0.000088  min_lr: 0.000088  loss: 2.8306 (2.7644)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6252 (1.7102)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [273]  [1600/2502]  eta: 0:05:02  lr: 0.000088  min_lr: 0.000088  loss: 2.8893 (2.7737)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6542 (1.7089)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [273]  [1800/2502]  eta: 0:03:55  lr: 0.000087  min_lr: 0.000087  loss: 2.6566 (2.7628)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7924 (1.7095)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [273]  [2000/2502]  eta: 0:02:48  lr: 0.000087  min_lr: 0.000087  loss: 2.9770 (2.7659)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6763 (1.7075)  time: 0.3361  data: 0.0004  max mem: 27255
Epoch: [273]  [2200/2502]  eta: 0:01:41  lr: 0.000086  min_lr: 0.000086  loss: 2.7554 (2.7653)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5387 (1.7010)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [273]  [2400/2502]  eta: 0:00:34  lr: 0.000086  min_lr: 0.000086  loss: 2.8575 (2.7697)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7245 (1.6980)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [273]  [2501/2502]  eta: 0:00:00  lr: 0.000085  min_lr: 0.000085  loss: 2.8962 (2.7711)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5644 (1.6955)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [273] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000085  min_lr: 0.000085  loss: 2.8962 (2.7755)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5644 (1.6955)
Test:  [ 0/50]  eta: 0:02:10  loss: 0.5286 (0.5286)  acc1: 93.6000 (93.6000)  acc5: 99.2000 (99.2000)  time: 2.6186  data: 2.4224  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7695 (0.7805)  acc1: 89.2000 (88.2182)  acc5: 98.8000 (97.9273)  time: 0.4275  data: 0.2608  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7940 (0.8056)  acc1: 85.2000 (87.0667)  acc5: 98.0000 (97.9619)  time: 0.2406  data: 0.0768  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9347 (0.8954)  acc1: 82.8000 (85.3419)  acc5: 96.8000 (97.0968)  time: 0.2185  data: 0.0548  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0904 (0.9459)  acc1: 80.0000 (83.9024)  acc5: 94.8000 (96.6927)  time: 0.2107  data: 0.0471  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0904 (0.9681)  acc1: 78.8000 (83.1760)  acc5: 95.6000 (96.6000)  time: 0.2104  data: 0.0469  max mem: 27255
Test: Total time: 0:00:13 (0.2643 s / it)
* Acc@1 83.512 Acc@5 96.632 loss 0.961
Accuracy of the model on the 50000 test images: 83.5%
Max accuracy: 83.55%
Epoch: [274]  [   0/2502]  eta: 1:19:00  lr: 0.000085  min_lr: 0.000085  loss: 3.0380 (3.0380)  weight_decay: 0.0500 (0.0500)  time: 1.8947  data: 1.5591  max mem: 27255
Epoch: [274]  [ 200/2502]  eta: 0:13:14  lr: 0.000085  min_lr: 0.000085  loss: 2.9017 (2.7169)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5960 (1.6521)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [274]  [ 400/2502]  eta: 0:11:53  lr: 0.000084  min_lr: 0.000084  loss: 3.0121 (2.7487)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6211 (1.6706)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [274]  [ 600/2502]  eta: 0:10:42  lr: 0.000084  min_lr: 0.000084  loss: 2.7056 (2.7426)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6305 (1.6826)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [274]  [ 800/2502]  eta: 0:09:33  lr: 0.000083  min_lr: 0.000083  loss: 2.7925 (2.7576)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5958 (1.6808)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [274]  [1000/2502]  eta: 0:08:25  lr: 0.000083  min_lr: 0.000083  loss: 2.8654 (2.7727)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7255 (1.7003)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [274]  [1200/2502]  eta: 0:07:17  lr: 0.000082  min_lr: 0.000082  loss: 3.0845 (2.7781)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6461 (1.7073)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [274]  [1400/2502]  eta: 0:06:10  lr: 0.000082  min_lr: 0.000082  loss: 2.8503 (2.7752)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6766 (1.7023)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [274]  [1600/2502]  eta: 0:05:02  lr: 0.000081  min_lr: 0.000081  loss: 2.9517 (2.7700)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7252 (1.7134)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [274]  [1800/2502]  eta: 0:03:55  lr: 0.000081  min_lr: 0.000081  loss: 2.9039 (2.7660)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7528 (1.7185)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [274]  [2000/2502]  eta: 0:02:48  lr: 0.000080  min_lr: 0.000080  loss: 2.9022 (2.7599)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6033 (1.7174)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [274]  [2200/2502]  eta: 0:01:41  lr: 0.000080  min_lr: 0.000080  loss: 2.8371 (2.7604)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5828 (1.7119)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [274]  [2400/2502]  eta: 0:00:34  lr: 0.000079  min_lr: 0.000079  loss: 2.8019 (2.7659)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6384 (1.7172)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [274]  [2501/2502]  eta: 0:00:00  lr: 0.000079  min_lr: 0.000079  loss: 3.0062 (2.7687)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7234 (1.7203)  time: 0.3000  data: 0.0009  max mem: 27255
Epoch: [274] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000079  min_lr: 0.000079  loss: 3.0062 (2.7694)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7234 (1.7203)
Test:  [ 0/50]  eta: 0:02:15  loss: 0.5930 (0.5930)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.7006  data: 2.5031  max mem: 27255
Test:  [10/50]  eta: 0:00:15  loss: 0.8750 (0.8556)  acc1: 89.2000 (88.2182)  acc5: 98.0000 (98.0000)  time: 0.3951  data: 0.2280  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8816 (0.8812)  acc1: 84.4000 (87.0476)  acc5: 98.0000 (98.0952)  time: 0.2149  data: 0.0510  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0036 (0.9702)  acc1: 82.4000 (85.0839)  acc5: 96.8000 (97.0710)  time: 0.2430  data: 0.0783  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1529 (1.0170)  acc1: 79.6000 (83.7854)  acc5: 94.8000 (96.6439)  time: 0.1974  data: 0.0324  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1529 (1.0394)  acc1: 79.2000 (83.0400)  acc5: 95.6000 (96.5920)  time: 0.1751  data: 0.0097  max mem: 27255
Test: Total time: 0:00:12 (0.2525 s / it)
* Acc@1 83.332 Acc@5 96.596 loss 1.034
Accuracy of the model on the 50000 test images: 83.3%
Max accuracy: 83.55%
Epoch: [275]  [   0/2502]  eta: 1:34:04  lr: 0.000079  min_lr: 0.000079  loss: 2.7889 (2.7889)  weight_decay: 0.0500 (0.0500)  time: 2.2559  data: 1.9149  max mem: 27255
Epoch: [275]  [ 200/2502]  eta: 0:13:14  lr: 0.000079  min_lr: 0.000079  loss: 2.8563 (2.8040)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6320 (1.8033)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [275]  [ 400/2502]  eta: 0:11:54  lr: 0.000078  min_lr: 0.000078  loss: 2.8322 (2.7761)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6776 (1.7686)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [275]  [ 600/2502]  eta: 0:10:43  lr: 0.000078  min_lr: 0.000078  loss: 2.9470 (2.7694)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5099 (1.7317)  time: 0.3428  data: 0.0005  max mem: 27255
Epoch: [275]  [ 800/2502]  eta: 0:09:34  lr: 0.000077  min_lr: 0.000077  loss: 2.8481 (2.7595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5931 (1.7262)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [275]  [1000/2502]  eta: 0:08:25  lr: 0.000077  min_lr: 0.000077  loss: 2.7134 (2.7559)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6041 (inf)  time: 0.3403  data: 0.0005  max mem: 27255
Epoch: [275]  [1200/2502]  eta: 0:07:18  lr: 0.000076  min_lr: 0.000076  loss: 2.8969 (2.7578)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7225 (inf)  time: 0.3411  data: 0.0006  max mem: 27255
Epoch: [275]  [1400/2502]  eta: 0:06:10  lr: 0.000076  min_lr: 0.000076  loss: 2.8954 (2.7591)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6626 (inf)  time: 0.3333  data: 0.0005  max mem: 27255
Epoch: [275]  [1600/2502]  eta: 0:05:03  lr: 0.000075  min_lr: 0.000075  loss: 2.9907 (2.7650)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6103 (inf)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [275]  [1800/2502]  eta: 0:03:55  lr: 0.000075  min_lr: 0.000075  loss: 2.9319 (2.7667)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5880 (inf)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [275]  [2000/2502]  eta: 0:02:48  lr: 0.000074  min_lr: 0.000074  loss: 3.0251 (2.7744)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6407 (inf)  time: 0.3402  data: 0.0004  max mem: 27255
Epoch: [275]  [2200/2502]  eta: 0:01:41  lr: 0.000074  min_lr: 0.000074  loss: 2.8692 (2.7750)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6986 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [275]  [2400/2502]  eta: 0:00:34  lr: 0.000073  min_lr: 0.000073  loss: 2.9338 (2.7782)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7252 (inf)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [275]  [2501/2502]  eta: 0:00:00  lr: 0.000073  min_lr: 0.000073  loss: 2.7502 (2.7785)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6392 (inf)  time: 0.3007  data: 0.0007  max mem: 27255
Epoch: [275] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000073  min_lr: 0.000073  loss: 2.7502 (2.7664)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6392 (inf)
Test:  [ 0/50]  eta: 0:02:42  loss: 0.5907 (0.5907)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.2552  data: 3.0517  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8261 (0.8336)  acc1: 90.0000 (88.6182)  acc5: 98.4000 (97.9636)  time: 0.4765  data: 0.3089  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8529 (0.8585)  acc1: 85.2000 (87.3714)  acc5: 98.0000 (98.0381)  time: 0.2005  data: 0.0365  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0039 (0.9490)  acc1: 82.4000 (85.3290)  acc5: 96.8000 (96.9677)  time: 0.2213  data: 0.0574  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1488 (0.9970)  acc1: 80.0000 (83.9415)  acc5: 94.8000 (96.6049)  time: 0.2230  data: 0.0592  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1488 (1.0178)  acc1: 79.6000 (83.2800)  acc5: 95.6000 (96.5440)  time: 0.1848  data: 0.0211  max mem: 27255
Test: Total time: 0:00:13 (0.2661 s / it)
* Acc@1 83.454 Acc@5 96.586 loss 1.012
Accuracy of the model on the 50000 test images: 83.5%
Max accuracy: 83.55%
Epoch: [276]  [   0/2502]  eta: 1:18:18  lr: 0.000073  min_lr: 0.000073  loss: 1.7978 (1.7978)  weight_decay: 0.0500 (0.0500)  time: 1.8779  data: 1.4277  max mem: 27255
Epoch: [276]  [ 200/2502]  eta: 0:13:07  lr: 0.000073  min_lr: 0.000073  loss: 3.0449 (2.8051)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7068 (1.7340)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [276]  [ 400/2502]  eta: 0:11:51  lr: 0.000072  min_lr: 0.000072  loss: 2.7387 (2.7886)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6813 (1.7456)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [276]  [ 600/2502]  eta: 0:10:41  lr: 0.000072  min_lr: 0.000072  loss: 3.0013 (2.7691)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6286 (1.7309)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [276]  [ 800/2502]  eta: 0:09:32  lr: 0.000071  min_lr: 0.000071  loss: 2.6091 (2.7563)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6832 (1.7313)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [276]  [1000/2502]  eta: 0:08:24  lr: 0.000071  min_lr: 0.000071  loss: 2.8568 (2.7563)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6138 (1.7167)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [276]  [1200/2502]  eta: 0:07:17  lr: 0.000070  min_lr: 0.000070  loss: 2.8608 (2.7554)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8004 (1.7251)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [276]  [1400/2502]  eta: 0:06:09  lr: 0.000070  min_lr: 0.000070  loss: 3.1119 (2.7596)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6603 (1.7308)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [276]  [1600/2502]  eta: 0:05:02  lr: 0.000069  min_lr: 0.000069  loss: 3.0115 (2.7651)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7371 (1.7361)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [276]  [1800/2502]  eta: 0:03:55  lr: 0.000069  min_lr: 0.000069  loss: 2.7390 (2.7599)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6849 (1.7351)  time: 0.3367  data: 0.0004  max mem: 27255
Epoch: [276]  [2000/2502]  eta: 0:02:48  lr: 0.000068  min_lr: 0.000068  loss: 2.9870 (2.7602)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8007 (1.7373)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [276]  [2200/2502]  eta: 0:01:41  lr: 0.000068  min_lr: 0.000068  loss: 3.0163 (2.7613)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6503 (1.7464)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [276]  [2400/2502]  eta: 0:00:34  lr: 0.000067  min_lr: 0.000067  loss: 2.8577 (2.7622)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6140 (1.7483)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [276]  [2501/2502]  eta: 0:00:00  lr: 0.000067  min_lr: 0.000067  loss: 2.6508 (2.7615)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7229 (1.7488)  time: 0.3001  data: 0.0009  max mem: 27255
Epoch: [276] Total time: 0:13:59 (0.3355 s / it)
Averaged stats: lr: 0.000067  min_lr: 0.000067  loss: 2.6508 (2.7644)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7229 (1.7488)
Test:  [ 0/50]  eta: 0:02:33  loss: 0.4996 (0.4996)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.0766  data: 2.8706  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7490 (0.7566)  acc1: 89.2000 (88.7636)  acc5: 98.8000 (98.0000)  time: 0.4529  data: 0.2765  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7891 (0.7816)  acc1: 85.6000 (87.5619)  acc5: 98.0000 (98.0381)  time: 0.1914  data: 0.0228  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9018 (0.8709)  acc1: 83.2000 (85.5226)  acc5: 96.8000 (96.9936)  time: 0.2043  data: 0.0399  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0561 (0.9198)  acc1: 80.0000 (84.0683)  acc5: 94.4000 (96.6244)  time: 0.2070  data: 0.0405  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0561 (0.9405)  acc1: 79.2000 (83.3760)  acc5: 95.6000 (96.5520)  time: 0.2067  data: 0.0403  max mem: 27255
Test: Total time: 0:00:12 (0.2526 s / it)
* Acc@1 83.476 Acc@5 96.616 loss 0.935
Accuracy of the model on the 50000 test images: 83.5%
Max accuracy: 83.55%
Epoch: [277]  [   0/2502]  eta: 1:30:56  lr: 0.000067  min_lr: 0.000067  loss: 1.8740 (1.8740)  weight_decay: 0.0500 (0.0500)  time: 2.1810  data: 1.6245  max mem: 27255
Epoch: [277]  [ 200/2502]  eta: 0:13:11  lr: 0.000067  min_lr: 0.000067  loss: 2.6479 (2.7506)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5987 (1.7294)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [277]  [ 400/2502]  eta: 0:11:53  lr: 0.000066  min_lr: 0.000066  loss: 2.9517 (2.7555)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6262 (1.7342)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [277]  [ 600/2502]  eta: 0:10:41  lr: 0.000066  min_lr: 0.000066  loss: 2.5275 (2.7340)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6382 (1.7187)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [277]  [ 800/2502]  eta: 0:09:33  lr: 0.000065  min_lr: 0.000065  loss: 2.6109 (2.7393)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6160 (1.7153)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [277]  [1000/2502]  eta: 0:08:25  lr: 0.000065  min_lr: 0.000065  loss: 3.0304 (2.7499)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6221 (1.7127)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [277]  [1200/2502]  eta: 0:07:17  lr: 0.000064  min_lr: 0.000064  loss: 2.7387 (2.7427)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7393 (1.7255)  time: 0.3332  data: 0.0005  max mem: 27255
Epoch: [277]  [1400/2502]  eta: 0:06:09  lr: 0.000064  min_lr: 0.000064  loss: 2.8960 (2.7425)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5646 (1.7170)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [277]  [1600/2502]  eta: 0:05:02  lr: 0.000064  min_lr: 0.000064  loss: 2.8983 (2.7449)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6645 (1.7222)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [277]  [1800/2502]  eta: 0:03:55  lr: 0.000063  min_lr: 0.000063  loss: 2.8615 (2.7445)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6534 (1.7149)  time: 0.3367  data: 0.0005  max mem: 27255
Epoch: [277]  [2000/2502]  eta: 0:02:48  lr: 0.000063  min_lr: 0.000063  loss: 2.8340 (2.7439)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6564 (nan)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [277]  [2200/2502]  eta: 0:01:41  lr: 0.000062  min_lr: 0.000062  loss: 2.9431 (2.7468)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6055 (nan)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [277]  [2400/2502]  eta: 0:00:34  lr: 0.000062  min_lr: 0.000062  loss: 2.5772 (2.7477)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5781 (nan)  time: 0.3362  data: 0.0004  max mem: 27255
Epoch: [277]  [2501/2502]  eta: 0:00:00  lr: 0.000062  min_lr: 0.000062  loss: 2.8842 (2.7482)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5483 (nan)  time: 0.3003  data: 0.0009  max mem: 27255
Epoch: [277] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000062  min_lr: 0.000062  loss: 2.8842 (2.7562)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5483 (nan)
Test:  [ 0/50]  eta: 0:02:09  loss: 0.5238 (0.5238)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.5892  data: 2.3840  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7881 (0.7905)  acc1: 89.6000 (88.6545)  acc5: 98.4000 (97.8909)  time: 0.4487  data: 0.2809  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.7882 (0.8127)  acc1: 85.6000 (87.4667)  acc5: 98.0000 (98.0000)  time: 0.2595  data: 0.0955  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9294 (0.8982)  acc1: 82.8000 (85.4323)  acc5: 96.8000 (97.0839)  time: 0.2443  data: 0.0803  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0802 (0.9452)  acc1: 80.4000 (84.0781)  acc5: 94.8000 (96.6829)  time: 0.2052  data: 0.0413  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0802 (0.9653)  acc1: 80.0000 (83.4400)  acc5: 95.6000 (96.6080)  time: 0.1849  data: 0.0213  max mem: 27255
Test: Total time: 0:00:13 (0.2691 s / it)
* Acc@1 83.606 Acc@5 96.652 loss 0.959
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.61%
Epoch: [278]  [   0/2502]  eta: 1:27:34  lr: 0.000062  min_lr: 0.000062  loss: 2.4505 (2.4505)  weight_decay: 0.0500 (0.0500)  time: 2.1002  data: 1.7505  max mem: 27255
Epoch: [278]  [ 200/2502]  eta: 0:13:08  lr: 0.000061  min_lr: 0.000061  loss: 2.5816 (2.7450)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6750 (1.6814)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [278]  [ 400/2502]  eta: 0:11:52  lr: 0.000061  min_lr: 0.000061  loss: 3.0583 (2.7553)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6478 (1.7283)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [278]  [ 600/2502]  eta: 0:10:42  lr: 0.000060  min_lr: 0.000060  loss: 2.7837 (2.7586)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7176 (1.7223)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [278]  [ 800/2502]  eta: 0:09:33  lr: 0.000060  min_lr: 0.000060  loss: 2.9347 (2.7707)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7372 (1.7378)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [278]  [1000/2502]  eta: 0:08:25  lr: 0.000059  min_lr: 0.000059  loss: 2.8677 (2.7689)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6914 (1.7394)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [278]  [1200/2502]  eta: 0:07:17  lr: 0.000059  min_lr: 0.000059  loss: 2.9904 (2.7704)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6593 (1.7340)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [278]  [1400/2502]  eta: 0:06:10  lr: 0.000059  min_lr: 0.000059  loss: 2.7090 (2.7683)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6496 (1.7367)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [278]  [1600/2502]  eta: 0:05:02  lr: 0.000058  min_lr: 0.000058  loss: 2.6783 (2.7661)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6305 (1.7312)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [278]  [1800/2502]  eta: 0:03:55  lr: 0.000058  min_lr: 0.000058  loss: 2.4421 (2.7630)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7069 (1.7289)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [278]  [2000/2502]  eta: 0:02:48  lr: 0.000057  min_lr: 0.000057  loss: 2.7989 (2.7603)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7273 (1.7317)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [278]  [2200/2502]  eta: 0:01:41  lr: 0.000057  min_lr: 0.000057  loss: 2.9142 (2.7574)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6741 (1.7315)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [278]  [2400/2502]  eta: 0:00:34  lr: 0.000056  min_lr: 0.000056  loss: 2.9130 (2.7593)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6930 (1.7385)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [278]  [2501/2502]  eta: 0:00:00  lr: 0.000056  min_lr: 0.000056  loss: 2.7937 (2.7579)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7821 (1.7473)  time: 0.3000  data: 0.0007  max mem: 27255
Epoch: [278] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000056  min_lr: 0.000056  loss: 2.7937 (2.7513)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7821 (1.7473)
Test:  [ 0/50]  eta: 0:02:20  loss: 0.5071 (0.5071)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.8006  data: 2.6119  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7763 (0.7704)  acc1: 90.4000 (88.6909)  acc5: 98.4000 (98.0364)  time: 0.4415  data: 0.2753  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7819 (0.7969)  acc1: 85.2000 (87.4286)  acc5: 98.4000 (98.0381)  time: 0.2342  data: 0.0702  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9123 (0.8818)  acc1: 82.8000 (85.4323)  acc5: 96.8000 (97.1484)  time: 0.2459  data: 0.0812  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0658 (0.9280)  acc1: 79.2000 (83.9122)  acc5: 94.8000 (96.6829)  time: 0.2100  data: 0.0454  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0658 (0.9461)  acc1: 79.2000 (83.2320)  acc5: 95.6000 (96.6400)  time: 0.1774  data: 0.0137  max mem: 27255
Test: Total time: 0:00:13 (0.2651 s / it)
* Acc@1 83.576 Acc@5 96.672 loss 0.940
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.61%
Epoch: [279]  [   0/2502]  eta: 1:28:46  lr: 0.000056  min_lr: 0.000056  loss: 2.0518 (2.0518)  weight_decay: 0.0500 (0.0500)  time: 2.1289  data: 1.4657  max mem: 27255
Epoch: [279]  [ 200/2502]  eta: 0:13:08  lr: 0.000056  min_lr: 0.000056  loss: 2.6933 (2.7188)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6743 (1.7312)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [279]  [ 400/2502]  eta: 0:11:52  lr: 0.000055  min_lr: 0.000055  loss: 2.8589 (2.7306)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6211 (1.7131)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [279]  [ 600/2502]  eta: 0:10:41  lr: 0.000055  min_lr: 0.000055  loss: 2.8994 (2.7241)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7422 (1.7397)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [279]  [ 800/2502]  eta: 0:09:32  lr: 0.000055  min_lr: 0.000055  loss: 2.9921 (2.7439)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7027 (1.7579)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [279]  [1000/2502]  eta: 0:08:25  lr: 0.000054  min_lr: 0.000054  loss: 2.9362 (2.7455)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7771 (1.7576)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [279]  [1200/2502]  eta: 0:07:17  lr: 0.000054  min_lr: 0.000054  loss: 2.7332 (2.7449)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6331 (1.7553)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [279]  [1400/2502]  eta: 0:06:10  lr: 0.000053  min_lr: 0.000053  loss: 2.8845 (2.7506)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5965 (1.7564)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [279]  [1600/2502]  eta: 0:05:02  lr: 0.000053  min_lr: 0.000053  loss: 2.8742 (2.7510)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6357 (1.7533)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [279]  [1800/2502]  eta: 0:03:55  lr: 0.000053  min_lr: 0.000053  loss: 2.9377 (2.7501)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7095 (1.7445)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [279]  [2000/2502]  eta: 0:02:48  lr: 0.000052  min_lr: 0.000052  loss: 2.9979 (2.7524)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5483 (1.7400)  time: 0.3363  data: 0.0004  max mem: 27255
Epoch: [279]  [2200/2502]  eta: 0:01:41  lr: 0.000052  min_lr: 0.000052  loss: 2.8811 (2.7521)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5212 (1.7386)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [279]  [2400/2502]  eta: 0:00:34  lr: 0.000051  min_lr: 0.000051  loss: 2.9907 (2.7531)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6492 (1.7388)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [279]  [2501/2502]  eta: 0:00:00  lr: 0.000051  min_lr: 0.000051  loss: 2.9425 (2.7550)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6693 (1.7386)  time: 0.3005  data: 0.0007  max mem: 27255
Epoch: [279] Total time: 0:13:59 (0.3354 s / it)
Averaged stats: lr: 0.000051  min_lr: 0.000051  loss: 2.9425 (2.7563)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6693 (1.7386)
Test:  [ 0/50]  eta: 0:02:08  loss: 0.5495 (0.5495)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.5773  data: 2.3766  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7931 (0.7905)  acc1: 90.0000 (88.9091)  acc5: 98.8000 (98.1818)  time: 0.4432  data: 0.2760  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8030 (0.8184)  acc1: 85.6000 (87.6000)  acc5: 98.0000 (98.0191)  time: 0.2416  data: 0.0777  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9481 (0.9044)  acc1: 83.6000 (85.6000)  acc5: 96.8000 (97.0194)  time: 0.2290  data: 0.0645  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0928 (0.9522)  acc1: 80.4000 (84.1073)  acc5: 94.8000 (96.6829)  time: 0.2052  data: 0.0375  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0928 (0.9714)  acc1: 79.6000 (83.3840)  acc5: 96.0000 (96.6640)  time: 0.1978  data: 0.0303  max mem: 27255
Test: Total time: 0:00:13 (0.2620 s / it)
* Acc@1 83.618 Acc@5 96.680 loss 0.966
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.62%
Epoch: [280]  [   0/2502]  eta: 1:16:14  lr: 0.000051  min_lr: 0.000051  loss: 2.3530 (2.3530)  weight_decay: 0.0500 (0.0500)  time: 1.8283  data: 1.4746  max mem: 27255
Epoch: [280]  [ 200/2502]  eta: 0:13:08  lr: 0.000051  min_lr: 0.000051  loss: 2.7565 (2.7492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6219 (1.7646)  time: 0.3421  data: 0.0005  max mem: 27255
Epoch: [280]  [ 400/2502]  eta: 0:11:50  lr: 0.000050  min_lr: 0.000050  loss: 2.9149 (2.7414)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6838 (1.7952)  time: 0.3329  data: 0.0003  max mem: 27255
Epoch: [280]  [ 600/2502]  eta: 0:10:40  lr: 0.000050  min_lr: 0.000050  loss: 2.8601 (2.7228)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6048 (1.7811)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [280]  [ 800/2502]  eta: 0:09:31  lr: 0.000050  min_lr: 0.000050  loss: 2.7914 (2.7219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8036 (1.7698)  time: 0.3415  data: 0.0004  max mem: 27255
Epoch: [280]  [1000/2502]  eta: 0:08:24  lr: 0.000049  min_lr: 0.000049  loss: 2.7262 (2.7292)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6920 (1.7686)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [280]  [1200/2502]  eta: 0:07:16  lr: 0.000049  min_lr: 0.000049  loss: 2.6223 (2.7276)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7682 (1.7648)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [280]  [1400/2502]  eta: 0:06:09  lr: 0.000048  min_lr: 0.000048  loss: 2.6212 (2.7310)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6553 (1.7656)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [280]  [1600/2502]  eta: 0:05:02  lr: 0.000048  min_lr: 0.000048  loss: 2.8466 (2.7305)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7529 (1.7669)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [280]  [1800/2502]  eta: 0:03:55  lr: 0.000048  min_lr: 0.000048  loss: 2.9233 (2.7344)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6732 (1.7651)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [280]  [2000/2502]  eta: 0:02:48  lr: 0.000047  min_lr: 0.000047  loss: 2.7906 (2.7299)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7120 (1.7744)  time: 0.3341  data: 0.0003  max mem: 27255
Epoch: [280]  [2200/2502]  eta: 0:01:41  lr: 0.000047  min_lr: 0.000047  loss: 2.7762 (2.7321)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6711 (1.7745)  time: 0.3355  data: 0.0004  max mem: 27255
Epoch: [280]  [2400/2502]  eta: 0:00:34  lr: 0.000046  min_lr: 0.000046  loss: 2.9287 (2.7327)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6699 (1.7697)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [280]  [2501/2502]  eta: 0:00:00  lr: 0.000046  min_lr: 0.000046  loss: 2.8223 (2.7319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7357 (1.7665)  time: 0.3001  data: 0.0010  max mem: 27255
Epoch: [280] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000046  min_lr: 0.000046  loss: 2.8223 (2.7471)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7357 (1.7665)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.5335 (0.5335)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.2309  data: 3.0256  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7848 (0.7828)  acc1: 90.0000 (88.7636)  acc5: 98.8000 (98.1091)  time: 0.4918  data: 0.3239  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8044 (0.8089)  acc1: 84.8000 (87.3905)  acc5: 98.0000 (98.0191)  time: 0.2225  data: 0.0584  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9391 (0.8955)  acc1: 83.2000 (85.4581)  acc5: 96.8000 (97.0323)  time: 0.2178  data: 0.0535  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0876 (0.9434)  acc1: 80.4000 (84.0000)  acc5: 95.2000 (96.6342)  time: 0.1902  data: 0.0220  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0876 (0.9637)  acc1: 79.2000 (83.2800)  acc5: 95.6000 (96.5840)  time: 0.1912  data: 0.0219  max mem: 27255
Test: Total time: 0:00:13 (0.2617 s / it)
* Acc@1 83.610 Acc@5 96.670 loss 0.957
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.62%
Epoch: [281]  [   0/2502]  eta: 1:32:43  lr: 0.000046  min_lr: 0.000046  loss: 2.7916 (2.7916)  weight_decay: 0.0500 (0.0500)  time: 2.2236  data: 1.8785  max mem: 27255
Epoch: [281]  [ 200/2502]  eta: 0:13:10  lr: 0.000046  min_lr: 0.000046  loss: 2.7784 (2.7800)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6562 (1.7710)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [281]  [ 400/2502]  eta: 0:11:53  lr: 0.000046  min_lr: 0.000046  loss: 2.6353 (2.7638)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5403 (1.7309)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [281]  [ 600/2502]  eta: 0:10:43  lr: 0.000045  min_lr: 0.000045  loss: 2.9061 (2.7461)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7833 (1.7274)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [281]  [ 800/2502]  eta: 0:09:34  lr: 0.000045  min_lr: 0.000045  loss: 2.8940 (2.7507)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7107 (1.7372)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [281]  [1000/2502]  eta: 0:08:25  lr: 0.000044  min_lr: 0.000044  loss: 2.6170 (2.7469)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6986 (1.7282)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [281]  [1200/2502]  eta: 0:07:17  lr: 0.000044  min_lr: 0.000044  loss: 2.8404 (2.7511)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7398 (1.7394)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [281]  [1400/2502]  eta: 0:06:10  lr: 0.000044  min_lr: 0.000044  loss: 2.7805 (2.7527)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7475 (1.7440)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [281]  [1600/2502]  eta: 0:05:02  lr: 0.000043  min_lr: 0.000043  loss: 2.7728 (2.7529)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7872 (inf)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [281]  [1800/2502]  eta: 0:03:55  lr: 0.000043  min_lr: 0.000043  loss: 2.9622 (2.7569)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7537 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [281]  [2000/2502]  eta: 0:02:48  lr: 0.000043  min_lr: 0.000043  loss: 2.8489 (2.7581)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7898 (inf)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [281]  [2200/2502]  eta: 0:01:41  lr: 0.000042  min_lr: 0.000042  loss: 2.8916 (2.7595)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6386 (inf)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [281]  [2400/2502]  eta: 0:00:34  lr: 0.000042  min_lr: 0.000042  loss: 2.7134 (2.7566)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5631 (inf)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [281]  [2501/2502]  eta: 0:00:00  lr: 0.000042  min_lr: 0.000042  loss: 2.8687 (2.7605)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6477 (inf)  time: 0.3007  data: 0.0007  max mem: 27255
Epoch: [281] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.000042  min_lr: 0.000042  loss: 2.8687 (2.7454)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6477 (inf)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.5210 (0.5210)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.7445  data: 2.5452  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8059 (0.7874)  acc1: 88.8000 (88.3636)  acc5: 98.8000 (98.0364)  time: 0.4243  data: 0.2572  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8066 (0.8141)  acc1: 85.2000 (87.2000)  acc5: 98.0000 (98.0191)  time: 0.1975  data: 0.0337  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9769 (0.9050)  acc1: 83.2000 (85.3290)  acc5: 96.8000 (97.0194)  time: 0.2195  data: 0.0556  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1110 (0.9539)  acc1: 80.4000 (84.0195)  acc5: 94.8000 (96.6439)  time: 0.2106  data: 0.0469  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1110 (0.9749)  acc1: 79.6000 (83.3120)  acc5: 95.6000 (96.5760)  time: 0.2017  data: 0.0381  max mem: 27255
Test: Total time: 0:00:12 (0.2496 s / it)
* Acc@1 83.630 Acc@5 96.618 loss 0.967
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.63%
Epoch: [282]  [   0/2502]  eta: 1:11:02  lr: 0.000042  min_lr: 0.000042  loss: 3.2820 (3.2820)  weight_decay: 0.0500 (0.0500)  time: 1.7038  data: 1.3628  max mem: 27255
Epoch: [282]  [ 200/2502]  eta: 0:13:06  lr: 0.000041  min_lr: 0.000041  loss: 2.8328 (2.7443)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7367 (1.7314)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [282]  [ 400/2502]  eta: 0:11:50  lr: 0.000041  min_lr: 0.000041  loss: 2.9271 (2.7496)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6617 (1.7202)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [282]  [ 600/2502]  eta: 0:10:41  lr: 0.000041  min_lr: 0.000041  loss: 2.9560 (2.7390)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7952 (1.7321)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [282]  [ 800/2502]  eta: 0:09:32  lr: 0.000040  min_lr: 0.000040  loss: 2.8046 (2.7438)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7077 (1.7431)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [282]  [1000/2502]  eta: 0:08:24  lr: 0.000040  min_lr: 0.000040  loss: 2.8413 (2.7436)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7032 (1.7363)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [282]  [1200/2502]  eta: 0:07:16  lr: 0.000040  min_lr: 0.000040  loss: 2.9506 (2.7429)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6700 (1.7366)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [282]  [1400/2502]  eta: 0:06:09  lr: 0.000039  min_lr: 0.000039  loss: 2.8243 (2.7492)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7804 (1.7437)  time: 0.3329  data: 0.0005  max mem: 27255
Epoch: [282]  [1600/2502]  eta: 0:05:02  lr: 0.000039  min_lr: 0.000039  loss: 2.8997 (2.7556)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6218 (1.7376)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [282]  [1800/2502]  eta: 0:03:55  lr: 0.000038  min_lr: 0.000038  loss: 2.7704 (2.7573)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7847 (1.7385)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [282]  [2000/2502]  eta: 0:02:48  lr: 0.000038  min_lr: 0.000038  loss: 2.7856 (2.7576)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7363 (1.7413)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [282]  [2200/2502]  eta: 0:01:41  lr: 0.000038  min_lr: 0.000038  loss: 2.8123 (2.7566)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7513 (1.7452)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [282]  [2400/2502]  eta: 0:00:34  lr: 0.000037  min_lr: 0.000037  loss: 2.8250 (2.7548)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7105 (1.7464)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [282]  [2501/2502]  eta: 0:00:00  lr: 0.000037  min_lr: 0.000037  loss: 2.7596 (2.7556)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6637 (1.7439)  time: 0.3001  data: 0.0007  max mem: 27255
Epoch: [282] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000037  min_lr: 0.000037  loss: 2.7596 (2.7501)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6637 (1.7439)
Test:  [ 0/50]  eta: 0:02:17  loss: 0.4889 (0.4889)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.7418  data: 2.5373  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7525 (0.7545)  acc1: 90.8000 (88.6546)  acc5: 98.8000 (98.1455)  time: 0.4793  data: 0.3119  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7788 (0.7826)  acc1: 85.6000 (87.2952)  acc5: 98.0000 (98.0952)  time: 0.2234  data: 0.0597  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9124 (0.8685)  acc1: 82.4000 (85.3936)  acc5: 97.2000 (97.1226)  time: 0.1858  data: 0.0221  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0705 (0.9141)  acc1: 80.0000 (84.0390)  acc5: 94.8000 (96.7317)  time: 0.1798  data: 0.0162  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0705 (0.9360)  acc1: 80.0000 (83.3440)  acc5: 95.6000 (96.6720)  time: 0.1795  data: 0.0160  max mem: 27255
Test: Total time: 0:00:12 (0.2501 s / it)
* Acc@1 83.654 Acc@5 96.692 loss 0.931
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.65%
Epoch: [283]  [   0/2502]  eta: 1:19:04  lr: 0.000037  min_lr: 0.000037  loss: 2.7531 (2.7531)  weight_decay: 0.0500 (0.0500)  time: 1.8962  data: 1.5336  max mem: 27255
Epoch: [283]  [ 200/2502]  eta: 0:13:09  lr: 0.000037  min_lr: 0.000037  loss: 2.9275 (2.7684)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6472 (1.7341)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [283]  [ 400/2502]  eta: 0:11:52  lr: 0.000037  min_lr: 0.000037  loss: 2.8792 (2.7566)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5982 (1.7557)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [283]  [ 600/2502]  eta: 0:10:41  lr: 0.000036  min_lr: 0.000036  loss: 2.7494 (2.7733)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7687 (1.7665)  time: 0.3353  data: 0.0005  max mem: 27255
Epoch: [283]  [ 800/2502]  eta: 0:09:32  lr: 0.000036  min_lr: 0.000036  loss: 2.9369 (2.7628)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6568 (1.7590)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [283]  [1000/2502]  eta: 0:08:24  lr: 0.000036  min_lr: 0.000036  loss: 2.9122 (2.7666)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6456 (1.7530)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [283]  [1200/2502]  eta: 0:07:17  lr: 0.000035  min_lr: 0.000035  loss: 2.7194 (2.7571)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7896 (1.7516)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [283]  [1400/2502]  eta: 0:06:09  lr: 0.000035  min_lr: 0.000035  loss: 2.9374 (2.7546)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7693 (1.7494)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [283]  [1600/2502]  eta: 0:05:02  lr: 0.000035  min_lr: 0.000035  loss: 2.7779 (2.7628)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7309 (1.7449)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [283]  [1800/2502]  eta: 0:03:55  lr: 0.000034  min_lr: 0.000034  loss: 2.8010 (2.7576)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8710 (1.7443)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [283]  [2000/2502]  eta: 0:02:48  lr: 0.000034  min_lr: 0.000034  loss: 2.8502 (2.7546)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7100 (1.7503)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [283]  [2200/2502]  eta: 0:01:41  lr: 0.000034  min_lr: 0.000034  loss: 2.8221 (2.7500)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7517 (1.7621)  time: 0.3345  data: 0.0004  max mem: 27255
Epoch: [283]  [2400/2502]  eta: 0:00:34  lr: 0.000033  min_lr: 0.000033  loss: 2.9713 (2.7488)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.9365 (1.7641)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [283]  [2501/2502]  eta: 0:00:00  lr: 0.000033  min_lr: 0.000033  loss: 2.5890 (2.7495)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8386 (1.7699)  time: 0.3069  data: 0.0008  max mem: 27255
Epoch: [283] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000033  min_lr: 0.000033  loss: 2.5890 (2.7452)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8386 (1.7699)
Test:  [ 0/50]  eta: 0:02:23  loss: 0.4852 (0.4852)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.8634  data: 2.6619  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7221 (0.7345)  acc1: 90.8000 (88.7636)  acc5: 98.4000 (98.0727)  time: 0.4319  data: 0.2644  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7417 (0.7632)  acc1: 85.2000 (87.5048)  acc5: 98.0000 (98.0571)  time: 0.1991  data: 0.0351  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.8932 (0.8482)  acc1: 83.2000 (85.5871)  acc5: 96.8000 (97.1097)  time: 0.2081  data: 0.0442  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0381 (0.8962)  acc1: 79.2000 (84.1268)  acc5: 94.8000 (96.6537)  time: 0.2023  data: 0.0384  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0381 (0.9159)  acc1: 79.2000 (83.4800)  acc5: 95.2000 (96.6000)  time: 0.1828  data: 0.0191  max mem: 27255
Test: Total time: 0:00:12 (0.2494 s / it)
* Acc@1 83.692 Acc@5 96.704 loss 0.909
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [284]  [   0/2502]  eta: 1:15:09  lr: 0.000033  min_lr: 0.000033  loss: 2.5690 (2.5690)  weight_decay: 0.0500 (0.0500)  time: 1.8024  data: 1.4450  max mem: 27255
Epoch: [284]  [ 200/2502]  eta: 0:13:10  lr: 0.000033  min_lr: 0.000033  loss: 2.7864 (2.7479)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6891 (1.7800)  time: 0.3425  data: 0.0005  max mem: 27255
Epoch: [284]  [ 400/2502]  eta: 0:11:52  lr: 0.000032  min_lr: 0.000032  loss: 2.8376 (2.7665)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7023 (1.7613)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [284]  [ 600/2502]  eta: 0:10:41  lr: 0.000032  min_lr: 0.000032  loss: 2.8783 (2.7481)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6499 (1.7425)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [284]  [ 800/2502]  eta: 0:09:32  lr: 0.000032  min_lr: 0.000032  loss: 3.0137 (2.7522)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7354 (1.7698)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [284]  [1000/2502]  eta: 0:08:24  lr: 0.000032  min_lr: 0.000032  loss: 2.7321 (2.7420)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6917 (1.7510)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [284]  [1200/2502]  eta: 0:07:17  lr: 0.000031  min_lr: 0.000031  loss: 2.5869 (2.7376)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6247 (1.7372)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [284]  [1400/2502]  eta: 0:06:09  lr: 0.000031  min_lr: 0.000031  loss: 2.9368 (2.7464)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5832 (1.7336)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [284]  [1600/2502]  eta: 0:05:02  lr: 0.000031  min_lr: 0.000031  loss: 2.7148 (2.7405)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7213 (1.7435)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [284]  [1800/2502]  eta: 0:03:55  lr: 0.000030  min_lr: 0.000030  loss: 2.9629 (2.7405)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8540 (1.7402)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [284]  [2000/2502]  eta: 0:02:48  lr: 0.000030  min_lr: 0.000030  loss: 2.8758 (2.7390)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7659 (1.7430)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [284]  [2200/2502]  eta: 0:01:41  lr: 0.000030  min_lr: 0.000030  loss: 2.9761 (2.7426)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8019 (1.7530)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [284]  [2400/2502]  eta: 0:00:34  lr: 0.000029  min_lr: 0.000029  loss: 2.5896 (2.7454)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7311 (1.7624)  time: 0.3327  data: 0.0004  max mem: 27255
Epoch: [284]  [2501/2502]  eta: 0:00:00  lr: 0.000029  min_lr: 0.000029  loss: 2.6269 (2.7462)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5936 (1.7592)  time: 0.2999  data: 0.0007  max mem: 27255
Epoch: [284] Total time: 0:13:57 (0.3347 s / it)
Averaged stats: lr: 0.000029  min_lr: 0.000029  loss: 2.6269 (2.7378)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5936 (1.7592)
Test:  [ 0/50]  eta: 0:02:23  loss: 0.5440 (0.5440)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.8640  data: 2.6745  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.8013 (0.8004)  acc1: 89.6000 (88.5455)  acc5: 98.8000 (98.0727)  time: 0.4261  data: 0.2598  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8104 (0.8262)  acc1: 85.2000 (87.3143)  acc5: 98.4000 (98.0952)  time: 0.1751  data: 0.0111  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9698 (0.9167)  acc1: 82.8000 (85.4323)  acc5: 96.4000 (97.0065)  time: 0.1956  data: 0.0316  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1213 (0.9669)  acc1: 80.0000 (83.8634)  acc5: 94.4000 (96.5951)  time: 0.2035  data: 0.0396  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1213 (0.9863)  acc1: 78.8000 (83.2160)  acc5: 95.2000 (96.5520)  time: 0.1920  data: 0.0282  max mem: 27255
Test: Total time: 0:00:12 (0.2400 s / it)
* Acc@1 83.584 Acc@5 96.654 loss 0.980
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.69%
Epoch: [285]  [   0/2502]  eta: 1:34:18  lr: 0.000029  min_lr: 0.000029  loss: 2.4630 (2.4630)  weight_decay: 0.0500 (0.0500)  time: 2.2617  data: 1.9044  max mem: 27255
Epoch: [285]  [ 200/2502]  eta: 0:13:10  lr: 0.000029  min_lr: 0.000029  loss: 2.7744 (2.7471)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6776 (1.7374)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [285]  [ 400/2502]  eta: 0:11:52  lr: 0.000029  min_lr: 0.000029  loss: 2.8372 (2.7288)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6534 (1.7227)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [285]  [ 600/2502]  eta: 0:10:41  lr: 0.000028  min_lr: 0.000028  loss: 2.9075 (2.7450)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6640 (1.7162)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [285]  [ 800/2502]  eta: 0:09:33  lr: 0.000028  min_lr: 0.000028  loss: 2.9441 (2.7607)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6776 (1.7170)  time: 0.3369  data: 0.0004  max mem: 27255
Epoch: [285]  [1000/2502]  eta: 0:08:25  lr: 0.000028  min_lr: 0.000028  loss: 2.5605 (2.7532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7160 (1.7139)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [285]  [1200/2502]  eta: 0:07:17  lr: 0.000027  min_lr: 0.000027  loss: 2.7794 (2.7532)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6827 (1.7089)  time: 0.3436  data: 0.0004  max mem: 27255
Epoch: [285]  [1400/2502]  eta: 0:06:10  lr: 0.000027  min_lr: 0.000027  loss: 2.8386 (2.7551)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7427 (1.7121)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [285]  [1600/2502]  eta: 0:05:03  lr: 0.000027  min_lr: 0.000027  loss: 3.0344 (2.7503)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5836 (1.7217)  time: 0.3340  data: 0.0005  max mem: 27255
Epoch: [285]  [1800/2502]  eta: 0:03:56  lr: 0.000027  min_lr: 0.000027  loss: 2.8133 (2.7455)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7091 (1.7303)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [285]  [2000/2502]  eta: 0:02:48  lr: 0.000026  min_lr: 0.000026  loss: 2.8193 (2.7432)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6382 (1.7274)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [285]  [2200/2502]  eta: 0:01:41  lr: 0.000026  min_lr: 0.000026  loss: 2.8502 (2.7423)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6421 (1.7243)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [285]  [2400/2502]  eta: 0:00:34  lr: 0.000026  min_lr: 0.000026  loss: 3.0167 (2.7430)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7808 (1.7240)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [285]  [2501/2502]  eta: 0:00:00  lr: 0.000026  min_lr: 0.000026  loss: 2.9250 (2.7416)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7375 (1.7249)  time: 0.3010  data: 0.0009  max mem: 27255
Epoch: [285] Total time: 0:14:00 (0.3357 s / it)
Averaged stats: lr: 0.000026  min_lr: 0.000026  loss: 2.9250 (2.7375)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7375 (1.7249)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.5307 (0.5307)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.9893  data: 2.7947  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.7723 (0.7872)  acc1: 90.8000 (88.9818)  acc5: 98.8000 (98.1455)  time: 0.4229  data: 0.2562  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8008 (0.8134)  acc1: 85.2000 (87.5429)  acc5: 98.0000 (98.1143)  time: 0.2091  data: 0.0452  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9615 (0.9035)  acc1: 83.2000 (85.3677)  acc5: 96.8000 (97.2000)  time: 0.2569  data: 0.0923  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1067 (0.9532)  acc1: 79.6000 (83.9024)  acc5: 94.8000 (96.7512)  time: 0.2168  data: 0.0523  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1067 (0.9734)  acc1: 79.2000 (83.2080)  acc5: 96.0000 (96.7040)  time: 0.2160  data: 0.0521  max mem: 27255
Test: Total time: 0:00:13 (0.2614 s / it)
* Acc@1 83.638 Acc@5 96.688 loss 0.968
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.69%
Epoch: [286]  [   0/2502]  eta: 1:32:13  lr: 0.000026  min_lr: 0.000026  loss: 2.9799 (2.9799)  weight_decay: 0.0500 (0.0500)  time: 2.2116  data: 1.8531  max mem: 27255
Epoch: [286]  [ 200/2502]  eta: 0:13:13  lr: 0.000025  min_lr: 0.000025  loss: 2.8590 (2.6479)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6574 (1.7165)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [286]  [ 400/2502]  eta: 0:11:52  lr: 0.000025  min_lr: 0.000025  loss: 2.6546 (2.7143)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6815 (1.7529)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [286]  [ 600/2502]  eta: 0:10:41  lr: 0.000025  min_lr: 0.000025  loss: 2.7522 (2.7214)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7477 (1.7589)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [286]  [ 800/2502]  eta: 0:09:32  lr: 0.000025  min_lr: 0.000025  loss: 2.7950 (2.7099)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7453 (1.7541)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [286]  [1000/2502]  eta: 0:08:24  lr: 0.000024  min_lr: 0.000024  loss: 2.6213 (2.7193)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6802 (1.7438)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [286]  [1200/2502]  eta: 0:07:17  lr: 0.000024  min_lr: 0.000024  loss: 2.5885 (2.7180)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7483 (1.7527)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [286]  [1400/2502]  eta: 0:06:09  lr: 0.000024  min_lr: 0.000024  loss: 2.9581 (2.7210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6880 (1.7583)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [286]  [1600/2502]  eta: 0:05:02  lr: 0.000023  min_lr: 0.000023  loss: 2.7728 (2.7249)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6760 (1.7610)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [286]  [1800/2502]  eta: 0:03:55  lr: 0.000023  min_lr: 0.000023  loss: 2.8095 (2.7264)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5701 (1.7523)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [286]  [2000/2502]  eta: 0:02:48  lr: 0.000023  min_lr: 0.000023  loss: 2.7560 (2.7278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8094 (1.7625)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [286]  [2200/2502]  eta: 0:01:41  lr: 0.000023  min_lr: 0.000023  loss: 2.6329 (2.7328)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7210 (inf)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [286]  [2400/2502]  eta: 0:00:34  lr: 0.000022  min_lr: 0.000022  loss: 2.7467 (2.7347)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6543 (inf)  time: 0.3350  data: 0.0003  max mem: 27255
Epoch: [286]  [2501/2502]  eta: 0:00:00  lr: 0.000022  min_lr: 0.000022  loss: 2.7996 (2.7351)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8046 (inf)  time: 0.3011  data: 0.0009  max mem: 27255
Epoch: [286] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000022  min_lr: 0.000022  loss: 2.7996 (2.7371)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8046 (inf)
Test:  [ 0/50]  eta: 0:02:36  loss: 0.5107 (0.5107)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 3.1386  data: 2.9463  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7786 (0.7736)  acc1: 90.0000 (88.5818)  acc5: 98.8000 (98.0727)  time: 0.4716  data: 0.3042  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7826 (0.8011)  acc1: 85.2000 (87.4667)  acc5: 98.0000 (98.0762)  time: 0.2091  data: 0.0447  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9341 (0.8882)  acc1: 82.8000 (85.5097)  acc5: 96.8000 (97.0581)  time: 0.2117  data: 0.0474  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0925 (0.9356)  acc1: 80.4000 (84.0683)  acc5: 94.8000 (96.6342)  time: 0.1937  data: 0.0266  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0925 (0.9546)  acc1: 79.2000 (83.3440)  acc5: 96.0000 (96.6000)  time: 0.1934  data: 0.0264  max mem: 27255
Test: Total time: 0:00:12 (0.2554 s / it)
* Acc@1 83.670 Acc@5 96.666 loss 0.949
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [287]  [   0/2502]  eta: 1:36:12  lr: 0.000022  min_lr: 0.000022  loss: 2.9070 (2.9070)  weight_decay: 0.0500 (0.0500)  time: 2.3071  data: 1.9705  max mem: 27255
Epoch: [287]  [ 200/2502]  eta: 0:13:11  lr: 0.000022  min_lr: 0.000022  loss: 2.5781 (2.7726)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6369 (1.7433)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [287]  [ 400/2502]  eta: 0:11:52  lr: 0.000022  min_lr: 0.000022  loss: 2.8596 (2.7491)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6339 (1.7848)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [287]  [ 600/2502]  eta: 0:10:42  lr: 0.000021  min_lr: 0.000021  loss: 2.7292 (2.7248)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7351 (1.7736)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [287]  [ 800/2502]  eta: 0:09:33  lr: 0.000021  min_lr: 0.000021  loss: 2.7261 (2.7314)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6930 (1.7613)  time: 0.3364  data: 0.0004  max mem: 27255
Epoch: [287]  [1000/2502]  eta: 0:08:25  lr: 0.000021  min_lr: 0.000021  loss: 2.9034 (2.7295)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6383 (1.7569)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [287]  [1200/2502]  eta: 0:07:18  lr: 0.000021  min_lr: 0.000021  loss: 2.7379 (2.7336)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7164 (1.7539)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [287]  [1400/2502]  eta: 0:06:10  lr: 0.000020  min_lr: 0.000020  loss: 2.6542 (2.7302)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7196 (1.7509)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [287]  [1600/2502]  eta: 0:05:03  lr: 0.000020  min_lr: 0.000020  loss: 2.5311 (2.7293)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7221 (1.7548)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [287]  [1800/2502]  eta: 0:03:55  lr: 0.000020  min_lr: 0.000020  loss: 2.8293 (2.7318)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6312 (1.7496)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [287]  [2000/2502]  eta: 0:02:48  lr: 0.000020  min_lr: 0.000020  loss: 2.7782 (2.7298)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6170 (1.7512)  time: 0.3352  data: 0.0004  max mem: 27255
Epoch: [287]  [2200/2502]  eta: 0:01:41  lr: 0.000019  min_lr: 0.000019  loss: 2.7268 (2.7285)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6770 (1.7493)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [287]  [2400/2502]  eta: 0:00:34  lr: 0.000019  min_lr: 0.000019  loss: 2.9954 (2.7313)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6813 (1.7448)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [287]  [2501/2502]  eta: 0:00:00  lr: 0.000019  min_lr: 0.000019  loss: 2.9297 (2.7325)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7271 (1.7487)  time: 0.3010  data: 0.0009  max mem: 27255
Epoch: [287] Total time: 0:13:59 (0.3356 s / it)
Averaged stats: lr: 0.000019  min_lr: 0.000019  loss: 2.9297 (2.7374)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7271 (1.7487)
Test:  [ 0/50]  eta: 0:02:41  loss: 0.5457 (0.5457)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.2295  data: 3.0413  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8005 (0.7918)  acc1: 90.4000 (88.7636)  acc5: 98.8000 (98.0727)  time: 0.4790  data: 0.3129  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8036 (0.8197)  acc1: 85.2000 (87.3333)  acc5: 98.0000 (98.0762)  time: 0.2095  data: 0.0456  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9643 (0.9076)  acc1: 82.8000 (85.4065)  acc5: 96.8000 (97.1355)  time: 0.2282  data: 0.0643  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1168 (0.9546)  acc1: 79.6000 (84.0488)  acc5: 94.8000 (96.6829)  time: 0.2084  data: 0.0445  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1168 (0.9758)  acc1: 79.6000 (83.3840)  acc5: 95.6000 (96.6080)  time: 0.2081  data: 0.0444  max mem: 27255
Test: Total time: 0:00:13 (0.2634 s / it)
* Acc@1 83.658 Acc@5 96.690 loss 0.970
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [288]  [   0/2502]  eta: 1:24:56  lr: 0.000019  min_lr: 0.000019  loss: 3.0248 (3.0248)  weight_decay: 0.0500 (0.0500)  time: 2.0369  data: 1.3549  max mem: 27255
Epoch: [288]  [ 200/2502]  eta: 0:13:10  lr: 0.000019  min_lr: 0.000019  loss: 2.6564 (2.7348)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6243 (1.7619)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [288]  [ 400/2502]  eta: 0:11:54  lr: 0.000019  min_lr: 0.000019  loss: 2.9840 (2.7482)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7999 (1.7848)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [288]  [ 600/2502]  eta: 0:10:42  lr: 0.000018  min_lr: 0.000018  loss: 2.6884 (2.7370)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6168 (1.7737)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [288]  [ 800/2502]  eta: 0:09:33  lr: 0.000018  min_lr: 0.000018  loss: 2.7904 (2.7206)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6266 (1.7510)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [288]  [1000/2502]  eta: 0:08:26  lr: 0.000018  min_lr: 0.000018  loss: 2.9337 (2.7245)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8390 (1.7569)  time: 0.3516  data: 0.0007  max mem: 27255
Epoch: [288]  [1200/2502]  eta: 0:07:18  lr: 0.000018  min_lr: 0.000018  loss: 2.8665 (2.7202)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7186 (1.7728)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [288]  [1400/2502]  eta: 0:06:10  lr: 0.000017  min_lr: 0.000017  loss: 2.9024 (2.7204)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5712 (1.7704)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [288]  [1600/2502]  eta: 0:05:02  lr: 0.000017  min_lr: 0.000017  loss: 2.8591 (2.7229)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6557 (1.7751)  time: 0.3358  data: 0.0004  max mem: 27255
Epoch: [288]  [1800/2502]  eta: 0:03:55  lr: 0.000017  min_lr: 0.000017  loss: 2.8673 (2.7197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6584 (1.7784)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [288]  [2000/2502]  eta: 0:02:48  lr: 0.000017  min_lr: 0.000017  loss: 2.9153 (2.7240)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7165 (1.7729)  time: 0.3353  data: 0.0003  max mem: 27255
Epoch: [288]  [2200/2502]  eta: 0:01:41  lr: 0.000017  min_lr: 0.000017  loss: 2.4479 (2.7268)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7664 (1.7710)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [288]  [2400/2502]  eta: 0:00:34  lr: 0.000016  min_lr: 0.000016  loss: 2.8819 (2.7273)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6892 (1.7661)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [288]  [2501/2502]  eta: 0:00:00  lr: 0.000016  min_lr: 0.000016  loss: 2.9024 (2.7288)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6194 (1.7639)  time: 0.3003  data: 0.0006  max mem: 27255
Epoch: [288] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.000016  min_lr: 0.000016  loss: 2.9024 (2.7296)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6194 (1.7639)
Test:  [ 0/50]  eta: 0:03:05  loss: 0.5352 (0.5352)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.7123  data: 3.5194  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.7884 (0.7864)  acc1: 90.0000 (88.6909)  acc5: 98.4000 (98.1455)  time: 0.5184  data: 0.3516  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7958 (0.8131)  acc1: 84.8000 (87.3143)  acc5: 98.0000 (98.0571)  time: 0.1972  data: 0.0332  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9441 (0.9009)  acc1: 82.8000 (85.2903)  acc5: 96.4000 (97.0323)  time: 0.2065  data: 0.0427  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1009 (0.9473)  acc1: 80.0000 (83.9024)  acc5: 94.4000 (96.6342)  time: 0.2105  data: 0.0468  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1009 (0.9668)  acc1: 80.0000 (83.2160)  acc5: 95.6000 (96.5760)  time: 0.2083  data: 0.0447  max mem: 27255
Test: Total time: 0:00:13 (0.2683 s / it)
* Acc@1 83.614 Acc@5 96.652 loss 0.960
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.69%
Epoch: [289]  [   0/2502]  eta: 1:28:20  lr: 0.000016  min_lr: 0.000016  loss: 2.5694 (2.5694)  weight_decay: 0.0500 (0.0500)  time: 2.1184  data: 1.5352  max mem: 27255
Epoch: [289]  [ 200/2502]  eta: 0:13:09  lr: 0.000016  min_lr: 0.000016  loss: 2.9894 (2.7462)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6570 (1.7262)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [289]  [ 400/2502]  eta: 0:11:50  lr: 0.000016  min_lr: 0.000016  loss: 2.9485 (2.7525)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7771 (1.7494)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [289]  [ 600/2502]  eta: 0:10:41  lr: 0.000016  min_lr: 0.000016  loss: 2.7147 (2.7465)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6442 (1.7381)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [289]  [ 800/2502]  eta: 0:09:32  lr: 0.000015  min_lr: 0.000015  loss: 3.0791 (2.7452)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7976 (1.7309)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [289]  [1000/2502]  eta: 0:08:24  lr: 0.000015  min_lr: 0.000015  loss: 2.8730 (2.7420)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6391 (1.7181)  time: 0.3333  data: 0.0003  max mem: 27255
Epoch: [289]  [1200/2502]  eta: 0:07:16  lr: 0.000015  min_lr: 0.000015  loss: 2.9324 (2.7434)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6101 (1.7180)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [289]  [1400/2502]  eta: 0:06:09  lr: 0.000015  min_lr: 0.000015  loss: 2.7515 (2.7384)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7091 (1.7279)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [289]  [1600/2502]  eta: 0:05:02  lr: 0.000014  min_lr: 0.000014  loss: 2.6760 (2.7335)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5916 (1.7224)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [289]  [1800/2502]  eta: 0:03:55  lr: 0.000014  min_lr: 0.000014  loss: 2.8892 (2.7342)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6881 (1.7236)  time: 0.3339  data: 0.0003  max mem: 27255
Epoch: [289]  [2000/2502]  eta: 0:02:48  lr: 0.000014  min_lr: 0.000014  loss: 2.8484 (2.7359)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7104 (1.7250)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [289]  [2200/2502]  eta: 0:01:41  lr: 0.000014  min_lr: 0.000014  loss: 2.5985 (2.7368)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6947 (1.7277)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [289]  [2400/2502]  eta: 0:00:34  lr: 0.000014  min_lr: 0.000014  loss: 2.6343 (2.7356)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6996 (1.7303)  time: 0.3352  data: 0.0003  max mem: 27255
Epoch: [289]  [2501/2502]  eta: 0:00:00  lr: 0.000014  min_lr: 0.000014  loss: 2.8622 (2.7365)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6179 (1.7281)  time: 0.3007  data: 0.0007  max mem: 27255
Epoch: [289] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000014  min_lr: 0.000014  loss: 2.8622 (2.7359)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6179 (1.7281)
Test:  [ 0/50]  eta: 0:02:21  loss: 0.5153 (0.5153)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.8346  data: 2.6429  max mem: 27255
Test:  [10/50]  eta: 0:00:17  loss: 0.7739 (0.7706)  acc1: 90.0000 (88.8364)  acc5: 98.8000 (98.1091)  time: 0.4427  data: 0.2762  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7852 (0.7957)  acc1: 84.4000 (87.4667)  acc5: 98.0000 (98.0762)  time: 0.2066  data: 0.0426  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9321 (0.8820)  acc1: 83.2000 (85.4839)  acc5: 96.4000 (97.0323)  time: 0.2347  data: 0.0707  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0818 (0.9296)  acc1: 80.0000 (84.0585)  acc5: 94.8000 (96.5854)  time: 0.2371  data: 0.0732  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0818 (0.9497)  acc1: 79.2000 (83.3600)  acc5: 95.2000 (96.5200)  time: 0.1891  data: 0.0255  max mem: 27255
Test: Total time: 0:00:13 (0.2657 s / it)
* Acc@1 83.656 Acc@5 96.644 loss 0.944
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [290]  [   0/2502]  eta: 1:29:12  lr: 0.000014  min_lr: 0.000014  loss: 3.1240 (3.1240)  weight_decay: 0.0500 (0.0500)  time: 2.1392  data: 1.7166  max mem: 27255
Epoch: [290]  [ 200/2502]  eta: 0:13:12  lr: 0.000013  min_lr: 0.000013  loss: 2.8149 (2.7210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7827 (1.8146)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [290]  [ 400/2502]  eta: 0:11:53  lr: 0.000013  min_lr: 0.000013  loss: 2.6323 (2.6969)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6865 (1.7955)  time: 0.3349  data: 0.0005  max mem: 27255
Epoch: [290]  [ 600/2502]  eta: 0:10:42  lr: 0.000013  min_lr: 0.000013  loss: 2.7165 (2.7104)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7539 (1.8103)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [290]  [ 800/2502]  eta: 0:09:33  lr: 0.000013  min_lr: 0.000013  loss: 2.7965 (2.7195)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5847 (1.7944)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [290]  [1000/2502]  eta: 0:08:25  lr: 0.000013  min_lr: 0.000013  loss: 2.8027 (2.7041)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6802 (1.7770)  time: 0.3432  data: 0.0004  max mem: 27255
Epoch: [290]  [1200/2502]  eta: 0:07:17  lr: 0.000012  min_lr: 0.000012  loss: 2.9869 (2.7066)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7754 (1.7686)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [290]  [1400/2502]  eta: 0:06:10  lr: 0.000012  min_lr: 0.000012  loss: 2.8253 (2.7120)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6992 (1.7777)  time: 0.3331  data: 0.0003  max mem: 27255
Epoch: [290]  [1600/2502]  eta: 0:05:02  lr: 0.000012  min_lr: 0.000012  loss: 2.8983 (2.7163)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7141 (1.7743)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [290]  [1800/2502]  eta: 0:03:55  lr: 0.000012  min_lr: 0.000012  loss: 2.8168 (2.7140)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6974 (1.7684)  time: 0.3337  data: 0.0003  max mem: 27255
Epoch: [290]  [2000/2502]  eta: 0:02:48  lr: 0.000012  min_lr: 0.000012  loss: 2.9382 (2.7198)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7631 (1.7726)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [290]  [2200/2502]  eta: 0:01:41  lr: 0.000011  min_lr: 0.000011  loss: 2.9460 (2.7204)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5814 (1.7708)  time: 0.3335  data: 0.0003  max mem: 27255
Epoch: [290]  [2400/2502]  eta: 0:00:34  lr: 0.000011  min_lr: 0.000011  loss: 2.5660 (2.7177)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6736 (1.7709)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [290]  [2501/2502]  eta: 0:00:00  lr: 0.000011  min_lr: 0.000011  loss: 2.6574 (2.7183)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6415 (1.7713)  time: 0.3005  data: 0.0007  max mem: 27255
Epoch: [290] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000011  min_lr: 0.000011  loss: 2.6574 (2.7319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6415 (1.7713)
Test:  [ 0/50]  eta: 0:02:55  loss: 0.4744 (0.4744)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.5081  data: 3.3087  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7382 (0.7306)  acc1: 90.0000 (88.9091)  acc5: 98.8000 (98.1091)  time: 0.4691  data: 0.3013  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7391 (0.7585)  acc1: 85.6000 (87.5810)  acc5: 98.0000 (98.0000)  time: 0.1711  data: 0.0063  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.8880 (0.8449)  acc1: 83.2000 (85.5355)  acc5: 96.4000 (97.0710)  time: 0.2155  data: 0.0464  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0394 (0.8917)  acc1: 79.6000 (84.1463)  acc5: 95.2000 (96.6537)  time: 0.2190  data: 0.0506  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0394 (0.9110)  acc1: 79.2000 (83.4560)  acc5: 95.6000 (96.5920)  time: 0.2142  data: 0.0505  max mem: 27255
Test: Total time: 0:00:12 (0.2578 s / it)
* Acc@1 83.662 Acc@5 96.658 loss 0.905
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [291]  [   0/2502]  eta: 1:16:23  lr: 0.000011  min_lr: 0.000011  loss: 2.7093 (2.7093)  weight_decay: 0.0500 (0.0500)  time: 1.8318  data: 1.4287  max mem: 27255
Epoch: [291]  [ 200/2502]  eta: 0:13:07  lr: 0.000011  min_lr: 0.000011  loss: 2.8630 (2.7436)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7255 (1.7754)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [291]  [ 400/2502]  eta: 0:11:51  lr: 0.000011  min_lr: 0.000011  loss: 2.7221 (2.7425)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6928 (1.7455)  time: 0.3358  data: 0.0005  max mem: 27255
Epoch: [291]  [ 600/2502]  eta: 0:10:41  lr: 0.000011  min_lr: 0.000011  loss: 2.8407 (2.7125)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6216 (1.7291)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [291]  [ 800/2502]  eta: 0:09:33  lr: 0.000010  min_lr: 0.000010  loss: 2.8374 (2.7247)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8078 (1.7435)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [291]  [1000/2502]  eta: 0:08:25  lr: 0.000010  min_lr: 0.000010  loss: 2.7884 (2.7280)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8627 (1.7327)  time: 0.3335  data: 0.0005  max mem: 27255
Epoch: [291]  [1200/2502]  eta: 0:07:17  lr: 0.000010  min_lr: 0.000010  loss: 2.7376 (2.7235)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5500 (1.7296)  time: 0.3412  data: 0.0005  max mem: 27255
Epoch: [291]  [1400/2502]  eta: 0:06:10  lr: 0.000010  min_lr: 0.000010  loss: 2.6504 (2.7280)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5784 (1.7310)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [291]  [1600/2502]  eta: 0:05:02  lr: 0.000010  min_lr: 0.000010  loss: 2.8869 (2.7346)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7141 (1.7454)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [291]  [1800/2502]  eta: 0:03:55  lr: 0.000010  min_lr: 0.000010  loss: 2.9002 (2.7401)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7549 (1.7512)  time: 0.3334  data: 0.0005  max mem: 27255
Epoch: [291]  [2000/2502]  eta: 0:02:48  lr: 0.000009  min_lr: 0.000009  loss: 2.9271 (2.7342)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6854 (1.7607)  time: 0.3343  data: 0.0005  max mem: 27255
Epoch: [291]  [2200/2502]  eta: 0:01:41  lr: 0.000009  min_lr: 0.000009  loss: 2.8209 (2.7343)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6459 (1.7582)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [291]  [2400/2502]  eta: 0:00:34  lr: 0.000009  min_lr: 0.000009  loss: 2.7721 (2.7349)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6340 (inf)  time: 0.3374  data: 0.0005  max mem: 27255
Epoch: [291]  [2501/2502]  eta: 0:00:00  lr: 0.000009  min_lr: 0.000009  loss: 2.9866 (2.7365)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7404 (inf)  time: 0.3020  data: 0.0009  max mem: 27255
Epoch: [291] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.000009  min_lr: 0.000009  loss: 2.9866 (2.7221)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7404 (inf)
Test:  [ 0/50]  eta: 0:02:50  loss: 0.6063 (0.6063)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 3.4001  data: 3.2050  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.8664 (0.8570)  acc1: 90.0000 (88.9091)  acc5: 98.4000 (98.0727)  time: 0.4951  data: 0.3284  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8751 (0.8869)  acc1: 85.6000 (87.4476)  acc5: 98.0000 (98.0571)  time: 0.1884  data: 0.0244  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 1.0251 (0.9734)  acc1: 82.8000 (85.4839)  acc5: 97.2000 (97.1226)  time: 0.1870  data: 0.0230  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1571 (1.0213)  acc1: 79.2000 (83.9512)  acc5: 94.8000 (96.6342)  time: 0.1972  data: 0.0333  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1571 (1.0406)  acc1: 78.8000 (83.2400)  acc5: 95.2000 (96.5760)  time: 0.1969  data: 0.0331  max mem: 27255
Test: Total time: 0:00:12 (0.2534 s / it)
* Acc@1 83.630 Acc@5 96.640 loss 1.035
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.69%
Epoch: [292]  [   0/2502]  eta: 1:32:16  lr: 0.000009  min_lr: 0.000009  loss: 2.9366 (2.9366)  weight_decay: 0.0500 (0.0500)  time: 2.2129  data: 1.8768  max mem: 27255
Epoch: [292]  [ 200/2502]  eta: 0:13:10  lr: 0.000009  min_lr: 0.000009  loss: 2.8222 (2.7171)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6406 (1.7269)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [292]  [ 400/2502]  eta: 0:11:51  lr: 0.000009  min_lr: 0.000009  loss: 2.9413 (2.7180)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7042 (1.7306)  time: 0.3345  data: 0.0005  max mem: 27255
Epoch: [292]  [ 600/2502]  eta: 0:10:41  lr: 0.000009  min_lr: 0.000009  loss: 2.8521 (2.7175)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8019 (1.7618)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [292]  [ 800/2502]  eta: 0:09:32  lr: 0.000008  min_lr: 0.000008  loss: 2.8302 (2.7278)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6903 (1.7397)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [292]  [1000/2502]  eta: 0:08:24  lr: 0.000008  min_lr: 0.000008  loss: 2.9772 (2.7390)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7684 (1.7383)  time: 0.3423  data: 0.0004  max mem: 27255
Epoch: [292]  [1200/2502]  eta: 0:07:17  lr: 0.000008  min_lr: 0.000008  loss: 3.0322 (2.7412)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5615 (1.7321)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [292]  [1400/2502]  eta: 0:06:09  lr: 0.000008  min_lr: 0.000008  loss: 2.6997 (2.7320)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6132 (1.7382)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [292]  [1600/2502]  eta: 0:05:02  lr: 0.000008  min_lr: 0.000008  loss: 2.8537 (2.7328)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6292 (1.7386)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [292]  [1800/2502]  eta: 0:03:55  lr: 0.000008  min_lr: 0.000008  loss: 2.8635 (2.7337)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5448 (1.7395)  time: 0.3372  data: 0.0004  max mem: 27255
Epoch: [292]  [2000/2502]  eta: 0:02:48  lr: 0.000008  min_lr: 0.000008  loss: 2.9722 (2.7310)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6538 (1.7390)  time: 0.3328  data: 0.0004  max mem: 27255
Epoch: [292]  [2200/2502]  eta: 0:01:41  lr: 0.000007  min_lr: 0.000007  loss: 2.8648 (2.7294)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6325 (1.7446)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [292]  [2400/2502]  eta: 0:00:34  lr: 0.000007  min_lr: 0.000007  loss: 2.9024 (2.7302)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6123 (1.7431)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [292]  [2501/2502]  eta: 0:00:00  lr: 0.000007  min_lr: 0.000007  loss: 2.7334 (2.7295)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6751 (1.7432)  time: 0.2997  data: 0.0008  max mem: 27255
Epoch: [292] Total time: 0:13:57 (0.3349 s / it)
Averaged stats: lr: 0.000007  min_lr: 0.000007  loss: 2.7334 (2.7360)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6751 (1.7432)
Test:  [ 0/50]  eta: 0:02:22  loss: 0.4892 (0.4892)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.8476  data: 2.6460  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.7479 (0.7447)  acc1: 89.2000 (88.8727)  acc5: 98.8000 (98.0727)  time: 0.4108  data: 0.2429  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7578 (0.7722)  acc1: 85.6000 (87.5048)  acc5: 98.0000 (97.9619)  time: 0.1967  data: 0.0325  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9120 (0.8604)  acc1: 82.8000 (85.4968)  acc5: 96.0000 (96.9290)  time: 0.2073  data: 0.0433  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0681 (0.9073)  acc1: 79.6000 (84.0976)  acc5: 94.8000 (96.5366)  time: 0.2305  data: 0.0667  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0681 (0.9276)  acc1: 79.2000 (83.3360)  acc5: 95.2000 (96.4880)  time: 0.2264  data: 0.0627  max mem: 27255
Test: Total time: 0:00:13 (0.2623 s / it)
* Acc@1 83.610 Acc@5 96.632 loss 0.921
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.69%
Epoch: [293]  [   0/2502]  eta: 1:20:41  lr: 0.000007  min_lr: 0.000007  loss: 3.0765 (3.0765)  weight_decay: 0.0500 (0.0500)  time: 1.9351  data: 1.4247  max mem: 27255
Epoch: [293]  [ 200/2502]  eta: 0:13:08  lr: 0.000007  min_lr: 0.000007  loss: 2.7825 (2.7160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6889 (1.7180)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [293]  [ 400/2502]  eta: 0:11:51  lr: 0.000007  min_lr: 0.000007  loss: 2.8581 (2.7100)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6746 (1.7186)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [293]  [ 600/2502]  eta: 0:10:41  lr: 0.000007  min_lr: 0.000007  loss: 2.8328 (2.7207)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6974 (1.7130)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [293]  [ 800/2502]  eta: 0:09:32  lr: 0.000007  min_lr: 0.000007  loss: 2.6218 (2.7245)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6185 (1.7229)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [293]  [1000/2502]  eta: 0:08:24  lr: 0.000006  min_lr: 0.000006  loss: 2.7497 (2.7235)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6234 (1.7199)  time: 0.3338  data: 0.0004  max mem: 27255
Epoch: [293]  [1200/2502]  eta: 0:07:17  lr: 0.000006  min_lr: 0.000006  loss: 2.7343 (2.7321)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6198 (1.7174)  time: 0.3351  data: 0.0005  max mem: 27255
Epoch: [293]  [1400/2502]  eta: 0:06:10  lr: 0.000006  min_lr: 0.000006  loss: 2.8831 (2.7272)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7367 (1.7222)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [293]  [1600/2502]  eta: 0:05:02  lr: 0.000006  min_lr: 0.000006  loss: 2.9031 (2.7347)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7128 (1.7257)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [293]  [1800/2502]  eta: 0:03:55  lr: 0.000006  min_lr: 0.000006  loss: 2.8259 (2.7335)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6678 (1.7236)  time: 0.3343  data: 0.0004  max mem: 27255
Epoch: [293]  [2000/2502]  eta: 0:02:48  lr: 0.000006  min_lr: 0.000006  loss: 2.9098 (2.7371)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6535 (1.7315)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [293]  [2200/2502]  eta: 0:01:41  lr: 0.000006  min_lr: 0.000006  loss: 2.7476 (2.7332)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6980 (1.7321)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [293]  [2400/2502]  eta: 0:00:34  lr: 0.000006  min_lr: 0.000006  loss: 2.9107 (2.7395)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7036 (1.7403)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [293]  [2501/2502]  eta: 0:00:00  lr: 0.000006  min_lr: 0.000006  loss: 2.6605 (2.7361)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7474 (1.7430)  time: 0.3005  data: 0.0009  max mem: 27255
Epoch: [293] Total time: 0:13:58 (0.3352 s / it)
Averaged stats: lr: 0.000006  min_lr: 0.000006  loss: 2.6605 (2.7291)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7474 (1.7430)
Test:  [ 0/50]  eta: 0:02:29  loss: 0.4522 (0.4522)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 2.9926  data: 2.7919  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7041 (0.7026)  acc1: 90.8000 (88.7636)  acc5: 98.8000 (98.0727)  time: 0.4785  data: 0.3096  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7223 (0.7321)  acc1: 85.6000 (87.4286)  acc5: 98.0000 (98.0191)  time: 0.2186  data: 0.0533  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.8679 (0.8191)  acc1: 82.8000 (85.5097)  acc5: 96.4000 (97.0839)  time: 0.2148  data: 0.0504  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0181 (0.8674)  acc1: 79.6000 (84.0488)  acc5: 94.8000 (96.6634)  time: 0.2079  data: 0.0440  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0181 (0.8888)  acc1: 79.2000 (83.3440)  acc5: 95.2000 (96.5840)  time: 0.2000  data: 0.0363  max mem: 27255
Test: Total time: 0:00:13 (0.2615 s / it)
* Acc@1 83.672 Acc@5 96.658 loss 0.882
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [294]  [   0/2502]  eta: 1:34:25  lr: 0.000006  min_lr: 0.000006  loss: 2.0071 (2.0071)  weight_decay: 0.0500 (0.0500)  time: 2.2642  data: 1.4053  max mem: 27255
Epoch: [294]  [ 200/2502]  eta: 0:13:11  lr: 0.000005  min_lr: 0.000005  loss: 2.9551 (2.7113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5959 (1.6882)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [294]  [ 400/2502]  eta: 0:11:53  lr: 0.000005  min_lr: 0.000005  loss: 2.8151 (2.7388)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8656 (1.7312)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [294]  [ 600/2502]  eta: 0:10:42  lr: 0.000005  min_lr: 0.000005  loss: 2.8540 (2.7381)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6643 (1.7455)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [294]  [ 800/2502]  eta: 0:09:33  lr: 0.000005  min_lr: 0.000005  loss: 2.9746 (2.7365)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8453 (1.7650)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [294]  [1000/2502]  eta: 0:08:25  lr: 0.000005  min_lr: 0.000005  loss: 2.6730 (2.7240)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7147 (1.7537)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [294]  [1200/2502]  eta: 0:07:17  lr: 0.000005  min_lr: 0.000005  loss: 2.8534 (2.7176)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7060 (1.7444)  time: 0.3341  data: 0.0005  max mem: 27255
Epoch: [294]  [1400/2502]  eta: 0:06:10  lr: 0.000005  min_lr: 0.000005  loss: 2.9524 (2.7230)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8097 (1.7438)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [294]  [1600/2502]  eta: 0:05:02  lr: 0.000005  min_lr: 0.000005  loss: 2.7244 (2.7208)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5909 (1.7359)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [294]  [1800/2502]  eta: 0:03:55  lr: 0.000005  min_lr: 0.000005  loss: 2.8341 (2.7228)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6745 (1.7318)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [294]  [2000/2502]  eta: 0:02:48  lr: 0.000004  min_lr: 0.000004  loss: 2.9404 (2.7244)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6556 (1.7335)  time: 0.3354  data: 0.0004  max mem: 27255
Epoch: [294]  [2200/2502]  eta: 0:01:41  lr: 0.000004  min_lr: 0.000004  loss: 2.7986 (2.7204)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7017 (1.7365)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [294]  [2400/2502]  eta: 0:00:34  lr: 0.000004  min_lr: 0.000004  loss: 2.5567 (2.7173)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5926 (1.7324)  time: 0.3377  data: 0.0004  max mem: 27255
Epoch: [294]  [2501/2502]  eta: 0:00:00  lr: 0.000004  min_lr: 0.000004  loss: 2.7381 (2.7158)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7352 (1.7320)  time: 0.3014  data: 0.0009  max mem: 27255
Epoch: [294] Total time: 0:13:58 (0.3350 s / it)
Averaged stats: lr: 0.000004  min_lr: 0.000004  loss: 2.7381 (2.7193)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7352 (1.7320)
Test:  [ 0/50]  eta: 0:03:04  loss: 0.5278 (0.5278)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.6879  data: 3.4896  max mem: 27255
Test:  [10/50]  eta: 0:00:20  loss: 0.7826 (0.7859)  acc1: 89.2000 (88.6546)  acc5: 98.8000 (98.0727)  time: 0.5082  data: 0.3411  max mem: 27255
Test:  [20/50]  eta: 0:00:11  loss: 0.7887 (0.8143)  acc1: 85.6000 (87.4095)  acc5: 98.0000 (98.0191)  time: 0.2211  data: 0.0572  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9484 (0.9022)  acc1: 83.2000 (85.5097)  acc5: 96.4000 (97.0710)  time: 0.2105  data: 0.0466  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1074 (0.9502)  acc1: 80.0000 (84.0683)  acc5: 94.8000 (96.6537)  time: 0.1721  data: 0.0037  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1074 (0.9685)  acc1: 78.4000 (83.3200)  acc5: 95.6000 (96.6080)  time: 0.1767  data: 0.0084  max mem: 27255
Test: Total time: 0:00:13 (0.2647 s / it)
* Acc@1 83.676 Acc@5 96.694 loss 0.962
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [295]  [   0/2502]  eta: 1:24:28  lr: 0.000004  min_lr: 0.000004  loss: 2.8970 (2.8970)  weight_decay: 0.0500 (0.0500)  time: 2.0258  data: 1.6513  max mem: 27255
Epoch: [295]  [ 200/2502]  eta: 0:13:08  lr: 0.000004  min_lr: 0.000004  loss: 2.7291 (2.7459)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7286 (1.7817)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [295]  [ 400/2502]  eta: 0:11:51  lr: 0.000004  min_lr: 0.000004  loss: 2.6342 (2.7266)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5590 (1.7524)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [295]  [ 600/2502]  eta: 0:10:40  lr: 0.000004  min_lr: 0.000004  loss: 2.7345 (2.7209)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7096 (1.7620)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [295]  [ 800/2502]  eta: 0:09:32  lr: 0.000004  min_lr: 0.000004  loss: 2.8243 (2.7197)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6690 (1.7597)  time: 0.3333  data: 0.0004  max mem: 27255
Epoch: [295]  [1000/2502]  eta: 0:08:24  lr: 0.000004  min_lr: 0.000004  loss: 2.9748 (2.7145)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7168 (1.7538)  time: 0.3346  data: 0.0005  max mem: 27255
Epoch: [295]  [1200/2502]  eta: 0:07:17  lr: 0.000004  min_lr: 0.000004  loss: 2.6781 (2.7115)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7777 (1.7495)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [295]  [1400/2502]  eta: 0:06:10  lr: 0.000003  min_lr: 0.000003  loss: 2.7707 (2.7114)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7918 (1.7523)  time: 0.3376  data: 0.0004  max mem: 27255
Epoch: [295]  [1600/2502]  eta: 0:05:02  lr: 0.000003  min_lr: 0.000003  loss: 3.0044 (2.7190)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6417 (1.7515)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [295]  [1800/2502]  eta: 0:03:55  lr: 0.000003  min_lr: 0.000003  loss: 2.9279 (2.7246)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8550 (inf)  time: 0.3329  data: 0.0004  max mem: 27255
Epoch: [295]  [2000/2502]  eta: 0:02:48  lr: 0.000003  min_lr: 0.000003  loss: 2.8516 (2.7212)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6607 (inf)  time: 0.3338  data: 0.0003  max mem: 27255
Epoch: [295]  [2200/2502]  eta: 0:01:41  lr: 0.000003  min_lr: 0.000003  loss: 2.7540 (2.7210)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6205 (inf)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [295]  [2400/2502]  eta: 0:00:34  lr: 0.000003  min_lr: 0.000003  loss: 2.8073 (2.7239)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7195 (inf)  time: 0.3412  data: 0.0004  max mem: 27255
Epoch: [295]  [2501/2502]  eta: 0:00:00  lr: 0.000003  min_lr: 0.000003  loss: 2.8913 (2.7239)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6316 (inf)  time: 0.3044  data: 0.0007  max mem: 27255
Epoch: [295] Total time: 0:13:58 (0.3353 s / it)
Averaged stats: lr: 0.000003  min_lr: 0.000003  loss: 2.8913 (2.7319)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6316 (inf)
Test:  [ 0/50]  eta: 0:02:49  loss: 0.5363 (0.5363)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.3932  data: 3.1938  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7826 (0.7874)  acc1: 90.4000 (88.6546)  acc5: 98.8000 (98.1091)  time: 0.4602  data: 0.2923  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.8025 (0.8149)  acc1: 85.6000 (87.3905)  acc5: 98.0000 (98.0762)  time: 0.1962  data: 0.0315  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9533 (0.9057)  acc1: 82.4000 (85.4323)  acc5: 96.4000 (97.0839)  time: 0.2309  data: 0.0655  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1168 (0.9555)  acc1: 79.6000 (83.9317)  acc5: 94.8000 (96.6439)  time: 0.2049  data: 0.0400  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1168 (0.9760)  acc1: 78.8000 (83.2480)  acc5: 95.6000 (96.5680)  time: 0.2035  data: 0.0398  max mem: 27255
Test: Total time: 0:00:12 (0.2596 s / it)
* Acc@1 83.656 Acc@5 96.638 loss 0.971
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.69%
Epoch: [296]  [   0/2502]  eta: 1:34:48  lr: 0.000003  min_lr: 0.000003  loss: 2.9857 (2.9857)  weight_decay: 0.0500 (0.0500)  time: 2.2735  data: 1.4294  max mem: 27255
Epoch: [296]  [ 200/2502]  eta: 0:13:11  lr: 0.000003  min_lr: 0.000003  loss: 3.0003 (2.7217)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7534 (1.7297)  time: 0.3335  data: 0.0004  max mem: 27255
Epoch: [296]  [ 400/2502]  eta: 0:11:53  lr: 0.000003  min_lr: 0.000003  loss: 2.7923 (2.7176)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6472 (1.7110)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [296]  [ 600/2502]  eta: 0:10:42  lr: 0.000003  min_lr: 0.000003  loss: 2.7375 (2.7085)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6515 (1.7276)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [296]  [ 800/2502]  eta: 0:09:33  lr: 0.000003  min_lr: 0.000003  loss: 2.7438 (2.7096)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6435 (1.7316)  time: 0.3347  data: 0.0005  max mem: 27255
Epoch: [296]  [1000/2502]  eta: 0:08:25  lr: 0.000003  min_lr: 0.000003  loss: 2.8811 (2.7155)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6807 (1.7360)  time: 0.3336  data: 0.0005  max mem: 27255
Epoch: [296]  [1200/2502]  eta: 0:07:17  lr: 0.000003  min_lr: 0.000003  loss: 2.7457 (2.7125)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6067 (1.7326)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [296]  [1400/2502]  eta: 0:06:10  lr: 0.000002  min_lr: 0.000002  loss: 2.8910 (2.7122)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8027 (1.7425)  time: 0.3348  data: 0.0005  max mem: 27255
Epoch: [296]  [1600/2502]  eta: 0:05:02  lr: 0.000002  min_lr: 0.000002  loss: 2.8248 (2.7113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5849 (1.7385)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [296]  [1800/2502]  eta: 0:03:55  lr: 0.000002  min_lr: 0.000002  loss: 2.9666 (2.7167)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8068 (1.7514)  time: 0.3337  data: 0.0005  max mem: 27255
Epoch: [296]  [2000/2502]  eta: 0:02:48  lr: 0.000002  min_lr: 0.000002  loss: 2.9354 (2.7180)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6193 (1.7453)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [296]  [2200/2502]  eta: 0:01:41  lr: 0.000002  min_lr: 0.000002  loss: 2.8552 (2.7219)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5898 (1.7469)  time: 0.3341  data: 0.0004  max mem: 27255
Epoch: [296]  [2400/2502]  eta: 0:00:34  lr: 0.000002  min_lr: 0.000002  loss: 2.8762 (2.7214)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7662 (1.7491)  time: 0.3331  data: 0.0005  max mem: 27255
Epoch: [296]  [2501/2502]  eta: 0:00:00  lr: 0.000002  min_lr: 0.000002  loss: 2.8172 (2.7222)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5739 (1.7484)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [296] Total time: 0:13:58 (0.3351 s / it)
Averaged stats: lr: 0.000002  min_lr: 0.000002  loss: 2.8172 (2.7269)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5739 (1.7484)
Test:  [ 0/50]  eta: 0:02:28  loss: 0.4897 (0.4897)  acc1: 94.8000 (94.8000)  acc5: 99.2000 (99.2000)  time: 2.9682  data: 2.7679  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.7522 (0.7486)  acc1: 90.4000 (88.8727)  acc5: 98.8000 (98.1091)  time: 0.4193  data: 0.2521  max mem: 27255
Test:  [20/50]  eta: 0:00:08  loss: 0.7594 (0.7777)  acc1: 85.6000 (87.4857)  acc5: 98.0000 (98.0571)  time: 0.1643  data: 0.0005  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.9153 (0.8678)  acc1: 82.8000 (85.5742)  acc5: 96.8000 (97.0839)  time: 0.1644  data: 0.0005  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0817 (0.9168)  acc1: 80.0000 (84.1268)  acc5: 94.8000 (96.6829)  time: 0.1782  data: 0.0143  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0817 (0.9360)  acc1: 79.2000 (83.4240)  acc5: 95.2000 (96.6320)  time: 0.1785  data: 0.0141  max mem: 27255
Test: Total time: 0:00:11 (0.2284 s / it)
* Acc@1 83.712 Acc@5 96.684 loss 0.930
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.71%
Epoch: [297]  [   0/2502]  eta: 2:16:21  lr: 0.000002  min_lr: 0.000002  loss: 1.8959 (1.8959)  weight_decay: 0.0500 (0.0500)  time: 3.2698  data: 2.8803  max mem: 27255
Epoch: [297]  [ 200/2502]  eta: 0:13:29  lr: 0.000002  min_lr: 0.000002  loss: 2.7928 (2.7345)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6609 (1.7349)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [297]  [ 400/2502]  eta: 0:12:01  lr: 0.000002  min_lr: 0.000002  loss: 2.8352 (2.7380)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6623 (1.7476)  time: 0.3339  data: 0.0005  max mem: 27255
Epoch: [297]  [ 600/2502]  eta: 0:10:47  lr: 0.000002  min_lr: 0.000002  loss: 2.5071 (2.7193)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6493 (1.7369)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [297]  [ 800/2502]  eta: 0:09:36  lr: 0.000002  min_lr: 0.000002  loss: 2.8961 (2.7264)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6966 (1.7436)  time: 0.3348  data: 0.0004  max mem: 27255
Epoch: [297]  [1000/2502]  eta: 0:08:27  lr: 0.000002  min_lr: 0.000002  loss: 2.7399 (2.7248)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5468 (1.7328)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [297]  [1200/2502]  eta: 0:07:19  lr: 0.000002  min_lr: 0.000002  loss: 2.8571 (2.7224)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.4896 (1.7164)  time: 0.3342  data: 0.0004  max mem: 27255
Epoch: [297]  [1400/2502]  eta: 0:06:11  lr: 0.000002  min_lr: 0.000002  loss: 2.8218 (2.7231)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5869 (1.7173)  time: 0.3346  data: 0.0004  max mem: 27255
Epoch: [297]  [1600/2502]  eta: 0:05:03  lr: 0.000002  min_lr: 0.000002  loss: 2.6353 (2.7206)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7098 (1.7215)  time: 0.3338  data: 0.0005  max mem: 27255
Epoch: [297]  [1800/2502]  eta: 0:03:56  lr: 0.000002  min_lr: 0.000002  loss: 2.7997 (2.7227)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6362 (1.7218)  time: 0.3350  data: 0.0004  max mem: 27255
Epoch: [297]  [2000/2502]  eta: 0:02:48  lr: 0.000002  min_lr: 0.000002  loss: 2.9061 (2.7216)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7693 (1.7203)  time: 0.3355  data: 0.0005  max mem: 27255
Epoch: [297]  [2200/2502]  eta: 0:01:41  lr: 0.000002  min_lr: 0.000002  loss: 2.6116 (2.7194)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6044 (1.7223)  time: 0.3332  data: 0.0004  max mem: 27255
Epoch: [297]  [2400/2502]  eta: 0:00:34  lr: 0.000002  min_lr: 0.000002  loss: 2.5993 (2.7213)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6670 (1.7224)  time: 0.3340  data: 0.0004  max mem: 27255
Epoch: [297]  [2501/2502]  eta: 0:00:00  lr: 0.000002  min_lr: 0.000002  loss: 2.8049 (2.7204)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6533 (1.7204)  time: 0.3033  data: 0.0008  max mem: 27255
Epoch: [297] Total time: 0:14:00 (0.3360 s / it)
Averaged stats: lr: 0.000002  min_lr: 0.000002  loss: 2.8049 (2.7206)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6533 (1.7204)
Test:  [ 0/50]  eta: 0:02:24  loss: 0.5960 (0.5960)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.8829  data: 2.6778  max mem: 27255
Test:  [10/50]  eta: 0:00:16  loss: 0.8659 (0.8516)  acc1: 89.6000 (88.5818)  acc5: 98.4000 (98.0000)  time: 0.4117  data: 0.2439  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.8705 (0.8789)  acc1: 85.6000 (87.2571)  acc5: 98.4000 (97.9619)  time: 0.1903  data: 0.0238  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 1.0249 (0.9694)  acc1: 82.4000 (85.3290)  acc5: 96.4000 (96.8903)  time: 0.2553  data: 0.0889  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.1795 (1.0182)  acc1: 80.0000 (83.9220)  acc5: 94.8000 (96.4683)  time: 0.2294  data: 0.0653  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.1795 (1.0385)  acc1: 79.2000 (83.2160)  acc5: 95.2000 (96.4560)  time: 0.1640  data: 0.0001  max mem: 27255
Test: Total time: 0:00:12 (0.2569 s / it)
* Acc@1 83.492 Acc@5 96.590 loss 1.031
Accuracy of the model on the 50000 test images: 83.5%
Max accuracy: 83.71%
Epoch: [298]  [   0/2502]  eta: 1:31:16  lr: 0.000002  min_lr: 0.000002  loss: 2.9511 (2.9511)  weight_decay: 0.0500 (0.0500)  time: 2.1889  data: 1.7141  max mem: 27255
Epoch: [298]  [ 200/2502]  eta: 0:13:10  lr: 0.000001  min_lr: 0.000001  loss: 2.9143 (2.7209)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.8465 (1.8385)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [298]  [ 400/2502]  eta: 0:11:53  lr: 0.000001  min_lr: 0.000001  loss: 2.8838 (2.7288)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5848 (1.7669)  time: 0.3330  data: 0.0004  max mem: 27255
Epoch: [298]  [ 600/2502]  eta: 0:10:42  lr: 0.000001  min_lr: 0.000001  loss: 2.8575 (2.7235)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6078 (1.7551)  time: 0.3344  data: 0.0005  max mem: 27255
Epoch: [298]  [ 800/2502]  eta: 0:09:33  lr: 0.000001  min_lr: 0.000001  loss: 2.8535 (2.7263)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7362 (1.7636)  time: 0.3354  data: 0.0005  max mem: 27255
Epoch: [298]  [1000/2502]  eta: 0:08:25  lr: 0.000001  min_lr: 0.000001  loss: 2.6645 (2.7160)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6718 (1.7493)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [298]  [1200/2502]  eta: 0:07:18  lr: 0.000001  min_lr: 0.000001  loss: 2.3637 (2.7046)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6536 (1.7425)  time: 0.3360  data: 0.0004  max mem: 27255
Epoch: [298]  [1400/2502]  eta: 0:06:10  lr: 0.000001  min_lr: 0.000001  loss: 2.5813 (2.7092)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6896 (1.7429)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [298]  [1600/2502]  eta: 0:05:03  lr: 0.000001  min_lr: 0.000001  loss: 2.7348 (2.7122)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7114 (1.7544)  time: 0.3357  data: 0.0004  max mem: 27255
Epoch: [298]  [1800/2502]  eta: 0:03:56  lr: 0.000001  min_lr: 0.000001  loss: 2.4553 (2.7093)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7307 (1.7569)  time: 0.3344  data: 0.0004  max mem: 27255
Epoch: [298]  [2000/2502]  eta: 0:02:48  lr: 0.000001  min_lr: 0.000001  loss: 2.9713 (2.7103)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6830 (1.7572)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [298]  [2200/2502]  eta: 0:01:41  lr: 0.000001  min_lr: 0.000001  loss: 2.7934 (2.7111)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7404 (1.7527)  time: 0.3349  data: 0.0004  max mem: 27255
Epoch: [298]  [2400/2502]  eta: 0:00:34  lr: 0.000001  min_lr: 0.000001  loss: 2.6763 (2.7113)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7939 (1.7505)  time: 0.3339  data: 0.0004  max mem: 27255
Epoch: [298]  [2501/2502]  eta: 0:00:00  lr: 0.000001  min_lr: 0.000001  loss: 2.7610 (2.7126)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7828 (1.7490)  time: 0.3009  data: 0.0009  max mem: 27255
Epoch: [298] Total time: 0:14:00 (0.3357 s / it)
Averaged stats: lr: 0.000001  min_lr: 0.000001  loss: 2.7610 (2.7229)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7828 (1.7490)
Test:  [ 0/50]  eta: 0:02:06  loss: 0.5172 (0.5172)  acc1: 94.4000 (94.4000)  acc5: 99.2000 (99.2000)  time: 2.5350  data: 2.3382  max mem: 27255
Test:  [10/50]  eta: 0:00:19  loss: 0.7707 (0.7701)  acc1: 90.4000 (88.8000)  acc5: 98.8000 (98.1455)  time: 0.4788  data: 0.3009  max mem: 27255
Test:  [20/50]  eta: 0:00:10  loss: 0.7835 (0.7983)  acc1: 85.6000 (87.4476)  acc5: 98.0000 (98.0571)  time: 0.2241  data: 0.0489  max mem: 27255
Test:  [30/50]  eta: 0:00:06  loss: 0.9348 (0.8845)  acc1: 82.8000 (85.5097)  acc5: 96.4000 (97.0710)  time: 0.2101  data: 0.0408  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0893 (0.9322)  acc1: 79.2000 (83.9902)  acc5: 94.8000 (96.6634)  time: 0.2447  data: 0.0808  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0893 (0.9502)  acc1: 78.8000 (83.3440)  acc5: 95.2000 (96.6080)  time: 0.2041  data: 0.0404  max mem: 27255
Test: Total time: 0:00:13 (0.2695 s / it)
* Acc@1 83.632 Acc@5 96.650 loss 0.944
Accuracy of the model on the 50000 test images: 83.6%
Max accuracy: 83.71%
Epoch: [299]  [   0/2502]  eta: 1:29:34  lr: 0.000001  min_lr: 0.000001  loss: 2.3161 (2.3161)  weight_decay: 0.0500 (0.0500)  time: 2.1483  data: 1.3345  max mem: 27255
Epoch: [299]  [ 200/2502]  eta: 0:13:08  lr: 0.000001  min_lr: 0.000001  loss: 2.7632 (2.7503)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5826 (1.7293)  time: 0.3343  data: 0.0003  max mem: 27255
Epoch: [299]  [ 400/2502]  eta: 0:11:50  lr: 0.000001  min_lr: 0.000001  loss: 2.9463 (2.7548)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6304 (1.7232)  time: 0.3347  data: 0.0004  max mem: 27255
Epoch: [299]  [ 600/2502]  eta: 0:10:41  lr: 0.000001  min_lr: 0.000001  loss: 2.8659 (2.7546)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.5789 (1.7243)  time: 0.3336  data: 0.0004  max mem: 27255
Epoch: [299]  [ 800/2502]  eta: 0:09:32  lr: 0.000001  min_lr: 0.000001  loss: 2.7504 (2.7489)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6129 (1.7195)  time: 0.3334  data: 0.0003  max mem: 27255
Epoch: [299]  [1000/2502]  eta: 0:08:24  lr: 0.000001  min_lr: 0.000001  loss: 2.7142 (2.7545)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6841 (1.7175)  time: 0.3324  data: 0.0003  max mem: 27255
Epoch: [299]  [1200/2502]  eta: 0:07:16  lr: 0.000001  min_lr: 0.000001  loss: 2.8757 (2.7587)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7743 (1.7153)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [299]  [1400/2502]  eta: 0:06:09  lr: 0.000001  min_lr: 0.000001  loss: 2.9873 (2.7576)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7746 (1.7191)  time: 0.3334  data: 0.0004  max mem: 27255
Epoch: [299]  [1600/2502]  eta: 0:05:02  lr: 0.000001  min_lr: 0.000001  loss: 2.8702 (2.7531)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6012 (1.7188)  time: 0.3331  data: 0.0004  max mem: 27255
Epoch: [299]  [1800/2502]  eta: 0:03:55  lr: 0.000001  min_lr: 0.000001  loss: 2.9413 (2.7509)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6611 (1.7178)  time: 0.3426  data: 0.0004  max mem: 27255
Epoch: [299]  [2000/2502]  eta: 0:02:48  lr: 0.000001  min_lr: 0.000001  loss: 2.8329 (2.7456)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7562 (1.7209)  time: 0.3337  data: 0.0004  max mem: 27255
Epoch: [299]  [2200/2502]  eta: 0:01:41  lr: 0.000001  min_lr: 0.000001  loss: 2.9230 (2.7397)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.7584 (1.7246)  time: 0.3351  data: 0.0004  max mem: 27255
Epoch: [299]  [2400/2502]  eta: 0:00:34  lr: 0.000001  min_lr: 0.000001  loss: 2.8941 (2.7398)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6168 (1.7234)  time: 0.3326  data: 0.0003  max mem: 27255
Epoch: [299]  [2501/2502]  eta: 0:00:00  lr: 0.000001  min_lr: 0.000001  loss: 2.7839 (2.7359)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6433 (1.7223)  time: 0.3002  data: 0.0009  max mem: 27255
Epoch: [299] Total time: 0:13:57 (0.3348 s / it)
Averaged stats: lr: 0.000001  min_lr: 0.000001  loss: 2.7839 (2.7287)  weight_decay: 0.0500 (0.0500)  grad_norm: 1.6433 (1.7223)
Test:  [ 0/50]  eta: 0:02:48  loss: 0.4781 (0.4781)  acc1: 94.0000 (94.0000)  acc5: 99.2000 (99.2000)  time: 3.3611  data: 3.1592  max mem: 27255
Test:  [10/50]  eta: 0:00:18  loss: 0.7331 (0.7299)  acc1: 90.0000 (88.6909)  acc5: 98.4000 (98.1455)  time: 0.4556  data: 0.2877  max mem: 27255
Test:  [20/50]  eta: 0:00:09  loss: 0.7349 (0.7568)  acc1: 85.6000 (87.3714)  acc5: 98.0000 (98.0381)  time: 0.1819  data: 0.0176  max mem: 27255
Test:  [30/50]  eta: 0:00:05  loss: 0.8982 (0.8466)  acc1: 83.2000 (85.4968)  acc5: 96.4000 (97.0065)  time: 0.1853  data: 0.0215  max mem: 27255
Test:  [40/50]  eta: 0:00:02  loss: 1.0565 (0.8951)  acc1: 80.0000 (84.0488)  acc5: 94.8000 (96.5951)  time: 0.1743  data: 0.0105  max mem: 27255
Test:  [49/50]  eta: 0:00:00  loss: 1.0565 (0.9142)  acc1: 79.2000 (83.3440)  acc5: 95.6000 (96.5600)  time: 0.1740  data: 0.0104  max mem: 27255
Test: Total time: 0:00:12 (0.2421 s / it)
* Acc@1 83.704 Acc@5 96.664 loss 0.908
Accuracy of the model on the 50000 test images: 83.7%
Max accuracy: 83.71%
Training time 2 days, 23:03:50
